michael@0: /* michael@0: punycode.c from RFC 3492 michael@0: http://www.nicemice.net/idn/ michael@0: Adam M. Costello michael@0: http://www.nicemice.net/amc/ michael@0: michael@0: This is ANSI C code (C89) implementing Punycode (RFC 3492). michael@0: michael@0: michael@0: michael@0: C. Disclaimer and license michael@0: michael@0: Regarding this entire document or any portion of it (including michael@0: the pseudocode and C code), the author makes no guarantees and michael@0: is not responsible for any damage resulting from its use. The michael@0: author grants irrevocable permission to anyone to use, modify, michael@0: and distribute it in any way that does not diminish the rights michael@0: of anyone else to use, modify, and distribute it, provided that michael@0: redistributed derivative works do not contain misleading author or michael@0: version information. Derivative works need not be licensed under michael@0: similar terms. michael@0: */ michael@0: michael@0: #ifdef __cplusplus michael@0: extern "C" { michael@0: #endif /* __cplusplus */ michael@0: michael@0: /************************************************************/ michael@0: /* Public interface (would normally go in its own .h file): */ michael@0: michael@0: #include michael@0: michael@0: enum punycode_status { michael@0: punycode_success, michael@0: punycode_bad_input, /* Input is invalid. */ michael@0: punycode_big_output, /* Output would exceed the space provided. */ michael@0: punycode_overflow /* Input needs wider integers to process. */ michael@0: }; michael@0: michael@0: #if UINT_MAX >= (1 << 26) - 1 michael@0: typedef unsigned int punycode_uint; michael@0: #else michael@0: typedef unsigned long punycode_uint; michael@0: #endif michael@0: michael@0: enum punycode_status punycode_encode( michael@0: punycode_uint input_length, michael@0: const punycode_uint input[], michael@0: const unsigned char case_flags[], michael@0: punycode_uint *output_length, michael@0: char output[] ); michael@0: michael@0: /* punycode_encode() converts Unicode to Punycode. The input */ michael@0: /* is represented as an array of Unicode code points (not code */ michael@0: /* units; surrogate pairs are not allowed), and the output */ michael@0: /* will be represented as an array of ASCII code points. The */ michael@0: /* output string is *not* null-terminated; it will contain */ michael@0: /* zeros if and only if the input contains zeros. (Of course */ michael@0: /* the caller can leave room for a terminator and add one if */ michael@0: /* needed.) The input_length is the number of code points in */ michael@0: /* the input. The output_length is an in/out argument: the */ michael@0: /* caller passes in the maximum number of code points that it */ michael@0: /* can receive, and on successful return it will contain the */ michael@0: /* number of code points actually output. The case_flags array */ michael@0: /* holds input_length boolean values, where nonzero suggests that */ michael@0: /* the corresponding Unicode character be forced to uppercase */ michael@0: /* after being decoded (if possible), and zero suggests that */ michael@0: /* it be forced to lowercase (if possible). ASCII code points */ michael@0: /* are encoded literally, except that ASCII letters are forced */ michael@0: /* to uppercase or lowercase according to the corresponding */ michael@0: /* uppercase flags. If case_flags is a null pointer then ASCII */ michael@0: /* letters are left as they are, and other code points are */ michael@0: /* treated as if their uppercase flags were zero. The return */ michael@0: /* value can be any of the punycode_status values defined above */ michael@0: /* except punycode_bad_input; if not punycode_success, then */ michael@0: /* output_size and output might contain garbage. */ michael@0: michael@0: enum punycode_status punycode_decode( michael@0: punycode_uint input_length, michael@0: const char input[], michael@0: punycode_uint *output_length, michael@0: punycode_uint output[], michael@0: unsigned char case_flags[] ); michael@0: michael@0: /* punycode_decode() converts Punycode to Unicode. The input is */ michael@0: /* represented as an array of ASCII code points, and the output */ michael@0: /* will be represented as an array of Unicode code points. The */ michael@0: /* input_length is the number of code points in the input. The */ michael@0: /* output_length is an in/out argument: the caller passes in */ michael@0: /* the maximum number of code points that it can receive, and */ michael@0: /* on successful return it will contain the actual number of */ michael@0: /* code points output. The case_flags array needs room for at */ michael@0: /* least output_length values, or it can be a null pointer if the */ michael@0: /* case information is not needed. A nonzero flag suggests that */ michael@0: /* the corresponding Unicode character be forced to uppercase */ michael@0: /* by the caller (if possible), while zero suggests that it be */ michael@0: /* forced to lowercase (if possible). ASCII code points are */ michael@0: /* output already in the proper case, but their flags will be set */ michael@0: /* appropriately so that applying the flags would be harmless. */ michael@0: /* The return value can be any of the punycode_status values */ michael@0: /* defined above; if not punycode_success, then output_length, */ michael@0: /* output, and case_flags might contain garbage. On success, the */ michael@0: /* decoder will never need to write an output_length greater than */ michael@0: /* input_length, because of how the encoding is defined. */ michael@0: michael@0: #ifdef __cplusplus michael@0: } michael@0: #endif /* __cplusplus */