|
1 /* |
|
2 punycode.c from RFC 3492 |
|
3 http://www.nicemice.net/idn/ |
|
4 Adam M. Costello |
|
5 http://www.nicemice.net/amc/ |
|
6 |
|
7 This is ANSI C code (C89) implementing Punycode (RFC 3492). |
|
8 |
|
9 |
|
10 |
|
11 C. Disclaimer and license |
|
12 |
|
13 Regarding this entire document or any portion of it (including |
|
14 the pseudocode and C code), the author makes no guarantees and |
|
15 is not responsible for any damage resulting from its use. The |
|
16 author grants irrevocable permission to anyone to use, modify, |
|
17 and distribute it in any way that does not diminish the rights |
|
18 of anyone else to use, modify, and distribute it, provided that |
|
19 redistributed derivative works do not contain misleading author or |
|
20 version information. Derivative works need not be licensed under |
|
21 similar terms. |
|
22 */ |
|
23 |
|
24 #ifdef __cplusplus |
|
25 extern "C" { |
|
26 #endif /* __cplusplus */ |
|
27 |
|
28 /************************************************************/ |
|
29 /* Public interface (would normally go in its own .h file): */ |
|
30 |
|
31 #include <limits.h> |
|
32 |
|
33 enum punycode_status { |
|
34 punycode_success, |
|
35 punycode_bad_input, /* Input is invalid. */ |
|
36 punycode_big_output, /* Output would exceed the space provided. */ |
|
37 punycode_overflow /* Input needs wider integers to process. */ |
|
38 }; |
|
39 |
|
40 #if UINT_MAX >= (1 << 26) - 1 |
|
41 typedef unsigned int punycode_uint; |
|
42 #else |
|
43 typedef unsigned long punycode_uint; |
|
44 #endif |
|
45 |
|
46 enum punycode_status punycode_encode( |
|
47 punycode_uint input_length, |
|
48 const punycode_uint input[], |
|
49 const unsigned char case_flags[], |
|
50 punycode_uint *output_length, |
|
51 char output[] ); |
|
52 |
|
53 /* punycode_encode() converts Unicode to Punycode. The input */ |
|
54 /* is represented as an array of Unicode code points (not code */ |
|
55 /* units; surrogate pairs are not allowed), and the output */ |
|
56 /* will be represented as an array of ASCII code points. The */ |
|
57 /* output string is *not* null-terminated; it will contain */ |
|
58 /* zeros if and only if the input contains zeros. (Of course */ |
|
59 /* the caller can leave room for a terminator and add one if */ |
|
60 /* needed.) The input_length is the number of code points in */ |
|
61 /* the input. The output_length is an in/out argument: the */ |
|
62 /* caller passes in the maximum number of code points that it */ |
|
63 /* can receive, and on successful return it will contain the */ |
|
64 /* number of code points actually output. The case_flags array */ |
|
65 /* holds input_length boolean values, where nonzero suggests that */ |
|
66 /* the corresponding Unicode character be forced to uppercase */ |
|
67 /* after being decoded (if possible), and zero suggests that */ |
|
68 /* it be forced to lowercase (if possible). ASCII code points */ |
|
69 /* are encoded literally, except that ASCII letters are forced */ |
|
70 /* to uppercase or lowercase according to the corresponding */ |
|
71 /* uppercase flags. If case_flags is a null pointer then ASCII */ |
|
72 /* letters are left as they are, and other code points are */ |
|
73 /* treated as if their uppercase flags were zero. The return */ |
|
74 /* value can be any of the punycode_status values defined above */ |
|
75 /* except punycode_bad_input; if not punycode_success, then */ |
|
76 /* output_size and output might contain garbage. */ |
|
77 |
|
78 enum punycode_status punycode_decode( |
|
79 punycode_uint input_length, |
|
80 const char input[], |
|
81 punycode_uint *output_length, |
|
82 punycode_uint output[], |
|
83 unsigned char case_flags[] ); |
|
84 |
|
85 /* punycode_decode() converts Punycode to Unicode. The input is */ |
|
86 /* represented as an array of ASCII code points, and the output */ |
|
87 /* will be represented as an array of Unicode code points. The */ |
|
88 /* input_length is the number of code points in the input. The */ |
|
89 /* output_length is an in/out argument: the caller passes in */ |
|
90 /* the maximum number of code points that it can receive, and */ |
|
91 /* on successful return it will contain the actual number of */ |
|
92 /* code points output. The case_flags array needs room for at */ |
|
93 /* least output_length values, or it can be a null pointer if the */ |
|
94 /* case information is not needed. A nonzero flag suggests that */ |
|
95 /* the corresponding Unicode character be forced to uppercase */ |
|
96 /* by the caller (if possible), while zero suggests that it be */ |
|
97 /* forced to lowercase (if possible). ASCII code points are */ |
|
98 /* output already in the proper case, but their flags will be set */ |
|
99 /* appropriately so that applying the flags would be harmless. */ |
|
100 /* The return value can be any of the punycode_status values */ |
|
101 /* defined above; if not punycode_success, then output_length, */ |
|
102 /* output, and case_flags might contain garbage. On success, the */ |
|
103 /* decoder will never need to write an output_length greater than */ |
|
104 /* input_length, because of how the encoding is defined. */ |
|
105 |
|
106 #ifdef __cplusplus |
|
107 } |
|
108 #endif /* __cplusplus */ |