Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "seccomon.h"
6 #include "secport.h"
8 #ifdef TEST_UTF8
9 #include <assert.h>
10 #undef PORT_Assert
11 #define PORT_Assert assert
12 #endif
14 /*
15 * From RFC 2044:
16 *
17 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
18 * 0000 0000-0000 007F 0xxxxxxx
19 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
20 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
21 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
22 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
23 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
24 */
26 /*
27 * From http://www.imc.org/draft-hoffman-utf16
28 *
29 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000
30 *
31 * U' = yyyyyyyyyyxxxxxxxxxx
32 * W1 = 110110yyyyyyyyyy
33 * W2 = 110111xxxxxxxxxx
34 */
36 /*
37 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
38 * character values. If you wish to use this code for working with
39 * host byte order values, define the following:
40 *
41 * #if IS_BIG_ENDIAN
42 * #define L_0 0
43 * #define L_1 1
44 * #define L_2 2
45 * #define L_3 3
46 * #define H_0 0
47 * #define H_1 1
48 * #else / * not everyone has elif * /
49 * #if IS_LITTLE_ENDIAN
50 * #define L_0 3
51 * #define L_1 2
52 * #define L_2 1
53 * #define L_3 0
54 * #define H_0 1
55 * #define H_1 0
56 * #else
57 * #error "PDP and NUXI support deferred"
58 * #endif / * IS_LITTLE_ENDIAN * /
59 * #endif / * IS_BIG_ENDIAN * /
60 */
62 #define L_0 0
63 #define L_1 1
64 #define L_2 2
65 #define L_3 3
66 #define H_0 0
67 #define H_1 1
69 #define BAD_UTF8 ((PRUint32)-1)
71 /*
72 * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
73 * of Unicode 4.0.0.
74 *
75 * Parameters:
76 * index - Points to the byte offset in inBuf of character to read. On success,
77 * updated to the offset of the following character.
78 * inBuf - Input buffer, UTF-8 encoded
79 * inbufLen - Length of input buffer, in bytes.
80 *
81 * Returns:
82 * Success - The UCS4 encoded character
83 * Failure - BAD_UTF8
84 */
85 static PRUint32
86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
87 {
88 PRUint32 result;
89 unsigned int i = *index;
90 int bytes_left;
91 PRUint32 min_value;
93 PORT_Assert(i < inBufLen);
95 if ( (inBuf[i] & 0x80) == 0x00 ) {
96 result = inBuf[i++];
97 bytes_left = 0;
98 min_value = 0;
99 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
100 result = inBuf[i++] & 0x1F;
101 bytes_left = 1;
102 min_value = 0x80;
103 } else if ( (inBuf[i] & 0xF0) == 0xE0) {
104 result = inBuf[i++] & 0x0F;
105 bytes_left = 2;
106 min_value = 0x800;
107 } else if ( (inBuf[i] & 0xF8) == 0xF0) {
108 result = inBuf[i++] & 0x07;
109 bytes_left = 3;
110 min_value = 0x10000;
111 } else {
112 return BAD_UTF8;
113 }
115 while (bytes_left--) {
116 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
117 result = (result << 6) | (inBuf[i++] & 0x3F);
118 }
120 /* Check for overlong sequences, surrogates, and outside unicode range */
121 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
122 return BAD_UTF8;
123 }
125 *index = i;
126 return result;
127 }
129 PRBool
130 sec_port_ucs4_utf8_conversion_function
131 (
132 PRBool toUnicode,
133 unsigned char *inBuf,
134 unsigned int inBufLen,
135 unsigned char *outBuf,
136 unsigned int maxOutBufLen,
137 unsigned int *outBufLen
138 )
139 {
140 PORT_Assert((unsigned int *)NULL != outBufLen);
142 if( toUnicode ) {
143 unsigned int i, len = 0;
145 for( i = 0; i < inBufLen; ) {
146 if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
147 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
148 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
149 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
150 else return PR_FALSE;
152 len += 4;
153 }
155 if( len > maxOutBufLen ) {
156 *outBufLen = len;
157 return PR_FALSE;
158 }
160 len = 0;
162 for( i = 0; i < inBufLen; ) {
163 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
165 if (ucs4 == BAD_UTF8) return PR_FALSE;
167 outBuf[len+L_0] = 0x00;
168 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
169 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
170 outBuf[len+L_3] = (unsigned char)ucs4;
172 len += 4;
173 }
175 *outBufLen = len;
176 return PR_TRUE;
177 } else {
178 unsigned int i, len = 0;
179 PORT_Assert((inBufLen % 4) == 0);
180 if ((inBufLen % 4) != 0) {
181 *outBufLen = 0;
182 return PR_FALSE;
183 }
185 for( i = 0; i < inBufLen; i += 4 ) {
186 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
187 *outBufLen = 0;
188 return PR_FALSE;
189 } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
190 else if( inBuf[i+L_2] >= 0x08 ) len += 3;
191 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
192 else len += 1;
193 }
195 if( len > maxOutBufLen ) {
196 *outBufLen = len;
197 return PR_FALSE;
198 }
200 len = 0;
202 for( i = 0; i < inBufLen; i += 4 ) {
203 if( inBuf[i+L_1] >= 0x01 ) {
204 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
205 /* 00000000 000abcde fghijklm nopqrstu ->
206 11110abc 10defghi 10jklmno 10pqrstu */
208 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
209 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
210 | ((inBuf[i+L_2] & 0xF0) >> 4);
211 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
212 | ((inBuf[i+L_3] & 0xC0) >> 6);
213 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
215 len += 4;
216 } else if( inBuf[i+L_2] >= 0x08 ) {
217 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
218 /* 00000000 00000000 abcdefgh ijklmnop ->
219 1110abcd 10efghij 10klmnop */
221 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
222 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
223 | ((inBuf[i+L_3] & 0xC0) >> 6);
224 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
226 len += 3;
227 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
228 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
229 /* 00000000 00000000 00000abc defghijk ->
230 110abcde 10fghijk */
232 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
233 | ((inBuf[i+L_3] & 0xC0) >> 6);
234 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
236 len += 2;
237 } else {
238 /* 0000 0000-0000 007F -> 0xxxxxx */
239 /* 00000000 00000000 00000000 0abcdefg ->
240 0abcdefg */
242 outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
244 len += 1;
245 }
246 }
248 *outBufLen = len;
249 return PR_TRUE;
250 }
251 }
253 PRBool
254 sec_port_ucs2_utf8_conversion_function
255 (
256 PRBool toUnicode,
257 unsigned char *inBuf,
258 unsigned int inBufLen,
259 unsigned char *outBuf,
260 unsigned int maxOutBufLen,
261 unsigned int *outBufLen
262 )
263 {
264 PORT_Assert((unsigned int *)NULL != outBufLen);
266 if( toUnicode ) {
267 unsigned int i, len = 0;
269 for( i = 0; i < inBufLen; ) {
270 if( (inBuf[i] & 0x80) == 0x00 ) {
271 i += 1;
272 len += 2;
273 } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
274 i += 2;
275 len += 2;
276 } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
277 i += 3;
278 len += 2;
279 } else if( (inBuf[i] & 0xF8) == 0xF0 ) {
280 i += 4;
281 len += 4;
282 } else return PR_FALSE;
283 }
285 if( len > maxOutBufLen ) {
286 *outBufLen = len;
287 return PR_FALSE;
288 }
290 len = 0;
292 for( i = 0; i < inBufLen; ) {
293 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
295 if (ucs4 == BAD_UTF8) return PR_FALSE;
297 if( ucs4 < 0x10000) {
298 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
299 outBuf[len+H_1] = (unsigned char)ucs4;
300 len += 2;
301 } else {
302 ucs4 -= 0x10000;
303 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
304 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
305 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
306 outBuf[len+2+H_1] = (unsigned char)ucs4;
307 len += 4;
308 }
309 }
311 *outBufLen = len;
312 return PR_TRUE;
313 } else {
314 unsigned int i, len = 0;
315 PORT_Assert((inBufLen % 2) == 0);
316 if ((inBufLen % 2) != 0) {
317 *outBufLen = 0;
318 return PR_FALSE;
319 }
321 for( i = 0; i < inBufLen; i += 2 ) {
322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
323 else if( inBuf[i+H_0] < 0x08 ) len += 2;
324 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
325 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
326 i += 2;
327 len += 4;
328 } else {
329 return PR_FALSE;
330 }
331 }
332 else len += 3;
333 }
335 if( len > maxOutBufLen ) {
336 *outBufLen = len;
337 return PR_FALSE;
338 }
340 len = 0;
342 for( i = 0; i < inBufLen; i += 2 ) {
343 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
344 /* 0000-007F -> 0xxxxxx */
345 /* 00000000 0abcdefg -> 0abcdefg */
347 outBuf[len] = inBuf[i+H_1] & 0x7F;
349 len += 1;
350 } else if( inBuf[i+H_0] < 0x08 ) {
351 /* 0080-07FF -> 110xxxxx 10xxxxxx */
352 /* 00000abc defghijk -> 110abcde 10fghijk */
354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2)
355 | ((inBuf[i+H_1] & 0xC0) >> 6);
356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
358 len += 2;
359 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
360 int abcde, BCDE;
362 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
365 /* 110110BC DEfghijk 110111lm nopqrstu ->
366 { Let abcde = BCDE + 1 }
367 11110abc 10defghi 10jklmno 10pqrstu */
369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
370 abcde = BCDE + 1;
372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
373 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4)
374 | ((inBuf[i+0+H_1] & 0x3C) >> 2);
375 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
376 | ((inBuf[i+2+H_0] & 0x03) << 2)
377 | ((inBuf[i+2+H_1] & 0xC0) >> 6);
378 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
380 i += 2;
381 len += 4;
382 } else {
383 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
384 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
386 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
387 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2)
388 | ((inBuf[i+H_1] & 0xC0) >> 6);
389 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
391 len += 3;
392 }
393 }
395 *outBufLen = len;
396 return PR_TRUE;
397 }
398 }
400 PRBool
401 sec_port_iso88591_utf8_conversion_function
402 (
403 const unsigned char *inBuf,
404 unsigned int inBufLen,
405 unsigned char *outBuf,
406 unsigned int maxOutBufLen,
407 unsigned int *outBufLen
408 )
409 {
410 unsigned int i, len = 0;
412 PORT_Assert((unsigned int *)NULL != outBufLen);
414 for( i = 0; i < inBufLen; i++) {
415 if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
416 else len += 2;
417 }
419 if( len > maxOutBufLen ) {
420 *outBufLen = len;
421 return PR_FALSE;
422 }
424 len = 0;
426 for( i = 0; i < inBufLen; i++) {
427 if( (inBuf[i] & 0x80) == 0x00 ) {
428 /* 00-7F -> 0xxxxxxx */
429 /* 0abcdefg -> 0abcdefg */
431 outBuf[len] = inBuf[i];
432 len += 1;
433 } else {
434 /* 80-FF <- 110xxxxx 10xxxxxx */
435 /* 00000000 abcdefgh -> 110000ab 10cdefgh */
437 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
438 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
440 len += 2;
441 }
442 }
444 *outBufLen = len;
445 return PR_TRUE;
446 }
448 #ifdef TEST_UTF8
450 #include <stdio.h>
451 #include <string.h>
452 #include <stdlib.h>
453 #include <netinet/in.h> /* for htonl and htons */
455 /*
456 * UCS-4 vectors
457 */
459 struct ucs4 {
460 PRUint32 c;
461 char *utf8;
462 };
464 /*
465 * UCS-2 vectors
466 */
468 struct ucs2 {
469 PRUint16 c;
470 char *utf8;
471 };
473 /*
474 * UTF-16 vectors
475 */
477 struct utf16 {
478 PRUint32 c;
479 PRUint16 w[2];
480 };
483 /*
484 * UCS-4 vectors
485 */
487 struct ucs4 ucs4[] = {
488 { 0x00000001, "\x01" },
489 { 0x00000002, "\x02" },
490 { 0x00000003, "\x03" },
491 { 0x00000004, "\x04" },
492 { 0x00000007, "\x07" },
493 { 0x00000008, "\x08" },
494 { 0x0000000F, "\x0F" },
495 { 0x00000010, "\x10" },
496 { 0x0000001F, "\x1F" },
497 { 0x00000020, "\x20" },
498 { 0x0000003F, "\x3F" },
499 { 0x00000040, "\x40" },
500 { 0x0000007F, "\x7F" },
502 { 0x00000080, "\xC2\x80" },
503 { 0x00000081, "\xC2\x81" },
504 { 0x00000082, "\xC2\x82" },
505 { 0x00000084, "\xC2\x84" },
506 { 0x00000088, "\xC2\x88" },
507 { 0x00000090, "\xC2\x90" },
508 { 0x000000A0, "\xC2\xA0" },
509 { 0x000000C0, "\xC3\x80" },
510 { 0x000000FF, "\xC3\xBF" },
511 { 0x00000100, "\xC4\x80" },
512 { 0x00000101, "\xC4\x81" },
513 { 0x00000102, "\xC4\x82" },
514 { 0x00000104, "\xC4\x84" },
515 { 0x00000108, "\xC4\x88" },
516 { 0x00000110, "\xC4\x90" },
517 { 0x00000120, "\xC4\xA0" },
518 { 0x00000140, "\xC5\x80" },
519 { 0x00000180, "\xC6\x80" },
520 { 0x000001FF, "\xC7\xBF" },
521 { 0x00000200, "\xC8\x80" },
522 { 0x00000201, "\xC8\x81" },
523 { 0x00000202, "\xC8\x82" },
524 { 0x00000204, "\xC8\x84" },
525 { 0x00000208, "\xC8\x88" },
526 { 0x00000210, "\xC8\x90" },
527 { 0x00000220, "\xC8\xA0" },
528 { 0x00000240, "\xC9\x80" },
529 { 0x00000280, "\xCA\x80" },
530 { 0x00000300, "\xCC\x80" },
531 { 0x000003FF, "\xCF\xBF" },
532 { 0x00000400, "\xD0\x80" },
533 { 0x00000401, "\xD0\x81" },
534 { 0x00000402, "\xD0\x82" },
535 { 0x00000404, "\xD0\x84" },
536 { 0x00000408, "\xD0\x88" },
537 { 0x00000410, "\xD0\x90" },
538 { 0x00000420, "\xD0\xA0" },
539 { 0x00000440, "\xD1\x80" },
540 { 0x00000480, "\xD2\x80" },
541 { 0x00000500, "\xD4\x80" },
542 { 0x00000600, "\xD8\x80" },
543 { 0x000007FF, "\xDF\xBF" },
545 { 0x00000800, "\xE0\xA0\x80" },
546 { 0x00000801, "\xE0\xA0\x81" },
547 { 0x00000802, "\xE0\xA0\x82" },
548 { 0x00000804, "\xE0\xA0\x84" },
549 { 0x00000808, "\xE0\xA0\x88" },
550 { 0x00000810, "\xE0\xA0\x90" },
551 { 0x00000820, "\xE0\xA0\xA0" },
552 { 0x00000840, "\xE0\xA1\x80" },
553 { 0x00000880, "\xE0\xA2\x80" },
554 { 0x00000900, "\xE0\xA4\x80" },
555 { 0x00000A00, "\xE0\xA8\x80" },
556 { 0x00000C00, "\xE0\xB0\x80" },
557 { 0x00000FFF, "\xE0\xBF\xBF" },
558 { 0x00001000, "\xE1\x80\x80" },
559 { 0x00001001, "\xE1\x80\x81" },
560 { 0x00001002, "\xE1\x80\x82" },
561 { 0x00001004, "\xE1\x80\x84" },
562 { 0x00001008, "\xE1\x80\x88" },
563 { 0x00001010, "\xE1\x80\x90" },
564 { 0x00001020, "\xE1\x80\xA0" },
565 { 0x00001040, "\xE1\x81\x80" },
566 { 0x00001080, "\xE1\x82\x80" },
567 { 0x00001100, "\xE1\x84\x80" },
568 { 0x00001200, "\xE1\x88\x80" },
569 { 0x00001400, "\xE1\x90\x80" },
570 { 0x00001800, "\xE1\xA0\x80" },
571 { 0x00001FFF, "\xE1\xBF\xBF" },
572 { 0x00002000, "\xE2\x80\x80" },
573 { 0x00002001, "\xE2\x80\x81" },
574 { 0x00002002, "\xE2\x80\x82" },
575 { 0x00002004, "\xE2\x80\x84" },
576 { 0x00002008, "\xE2\x80\x88" },
577 { 0x00002010, "\xE2\x80\x90" },
578 { 0x00002020, "\xE2\x80\xA0" },
579 { 0x00002040, "\xE2\x81\x80" },
580 { 0x00002080, "\xE2\x82\x80" },
581 { 0x00002100, "\xE2\x84\x80" },
582 { 0x00002200, "\xE2\x88\x80" },
583 { 0x00002400, "\xE2\x90\x80" },
584 { 0x00002800, "\xE2\xA0\x80" },
585 { 0x00003000, "\xE3\x80\x80" },
586 { 0x00003FFF, "\xE3\xBF\xBF" },
587 { 0x00004000, "\xE4\x80\x80" },
588 { 0x00004001, "\xE4\x80\x81" },
589 { 0x00004002, "\xE4\x80\x82" },
590 { 0x00004004, "\xE4\x80\x84" },
591 { 0x00004008, "\xE4\x80\x88" },
592 { 0x00004010, "\xE4\x80\x90" },
593 { 0x00004020, "\xE4\x80\xA0" },
594 { 0x00004040, "\xE4\x81\x80" },
595 { 0x00004080, "\xE4\x82\x80" },
596 { 0x00004100, "\xE4\x84\x80" },
597 { 0x00004200, "\xE4\x88\x80" },
598 { 0x00004400, "\xE4\x90\x80" },
599 { 0x00004800, "\xE4\xA0\x80" },
600 { 0x00005000, "\xE5\x80\x80" },
601 { 0x00006000, "\xE6\x80\x80" },
602 { 0x00007FFF, "\xE7\xBF\xBF" },
603 { 0x00008000, "\xE8\x80\x80" },
604 { 0x00008001, "\xE8\x80\x81" },
605 { 0x00008002, "\xE8\x80\x82" },
606 { 0x00008004, "\xE8\x80\x84" },
607 { 0x00008008, "\xE8\x80\x88" },
608 { 0x00008010, "\xE8\x80\x90" },
609 { 0x00008020, "\xE8\x80\xA0" },
610 { 0x00008040, "\xE8\x81\x80" },
611 { 0x00008080, "\xE8\x82\x80" },
612 { 0x00008100, "\xE8\x84\x80" },
613 { 0x00008200, "\xE8\x88\x80" },
614 { 0x00008400, "\xE8\x90\x80" },
615 { 0x00008800, "\xE8\xA0\x80" },
616 { 0x00009000, "\xE9\x80\x80" },
617 { 0x0000A000, "\xEA\x80\x80" },
618 { 0x0000C000, "\xEC\x80\x80" },
619 { 0x0000FFFF, "\xEF\xBF\xBF" },
621 { 0x00010000, "\xF0\x90\x80\x80" },
622 { 0x00010001, "\xF0\x90\x80\x81" },
623 { 0x00010002, "\xF0\x90\x80\x82" },
624 { 0x00010004, "\xF0\x90\x80\x84" },
625 { 0x00010008, "\xF0\x90\x80\x88" },
626 { 0x00010010, "\xF0\x90\x80\x90" },
627 { 0x00010020, "\xF0\x90\x80\xA0" },
628 { 0x00010040, "\xF0\x90\x81\x80" },
629 { 0x00010080, "\xF0\x90\x82\x80" },
630 { 0x00010100, "\xF0\x90\x84\x80" },
631 { 0x00010200, "\xF0\x90\x88\x80" },
632 { 0x00010400, "\xF0\x90\x90\x80" },
633 { 0x00010800, "\xF0\x90\xA0\x80" },
634 { 0x00011000, "\xF0\x91\x80\x80" },
635 { 0x00012000, "\xF0\x92\x80\x80" },
636 { 0x00014000, "\xF0\x94\x80\x80" },
637 { 0x00018000, "\xF0\x98\x80\x80" },
638 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
639 { 0x00020000, "\xF0\xA0\x80\x80" },
640 { 0x00020001, "\xF0\xA0\x80\x81" },
641 { 0x00020002, "\xF0\xA0\x80\x82" },
642 { 0x00020004, "\xF0\xA0\x80\x84" },
643 { 0x00020008, "\xF0\xA0\x80\x88" },
644 { 0x00020010, "\xF0\xA0\x80\x90" },
645 { 0x00020020, "\xF0\xA0\x80\xA0" },
646 { 0x00020040, "\xF0\xA0\x81\x80" },
647 { 0x00020080, "\xF0\xA0\x82\x80" },
648 { 0x00020100, "\xF0\xA0\x84\x80" },
649 { 0x00020200, "\xF0\xA0\x88\x80" },
650 { 0x00020400, "\xF0\xA0\x90\x80" },
651 { 0x00020800, "\xF0\xA0\xA0\x80" },
652 { 0x00021000, "\xF0\xA1\x80\x80" },
653 { 0x00022000, "\xF0\xA2\x80\x80" },
654 { 0x00024000, "\xF0\xA4\x80\x80" },
655 { 0x00028000, "\xF0\xA8\x80\x80" },
656 { 0x00030000, "\xF0\xB0\x80\x80" },
657 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
658 { 0x00040000, "\xF1\x80\x80\x80" },
659 { 0x00040001, "\xF1\x80\x80\x81" },
660 { 0x00040002, "\xF1\x80\x80\x82" },
661 { 0x00040004, "\xF1\x80\x80\x84" },
662 { 0x00040008, "\xF1\x80\x80\x88" },
663 { 0x00040010, "\xF1\x80\x80\x90" },
664 { 0x00040020, "\xF1\x80\x80\xA0" },
665 { 0x00040040, "\xF1\x80\x81\x80" },
666 { 0x00040080, "\xF1\x80\x82\x80" },
667 { 0x00040100, "\xF1\x80\x84\x80" },
668 { 0x00040200, "\xF1\x80\x88\x80" },
669 { 0x00040400, "\xF1\x80\x90\x80" },
670 { 0x00040800, "\xF1\x80\xA0\x80" },
671 { 0x00041000, "\xF1\x81\x80\x80" },
672 { 0x00042000, "\xF1\x82\x80\x80" },
673 { 0x00044000, "\xF1\x84\x80\x80" },
674 { 0x00048000, "\xF1\x88\x80\x80" },
675 { 0x00050000, "\xF1\x90\x80\x80" },
676 { 0x00060000, "\xF1\xA0\x80\x80" },
677 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
678 { 0x00080000, "\xF2\x80\x80\x80" },
679 { 0x00080001, "\xF2\x80\x80\x81" },
680 { 0x00080002, "\xF2\x80\x80\x82" },
681 { 0x00080004, "\xF2\x80\x80\x84" },
682 { 0x00080008, "\xF2\x80\x80\x88" },
683 { 0x00080010, "\xF2\x80\x80\x90" },
684 { 0x00080020, "\xF2\x80\x80\xA0" },
685 { 0x00080040, "\xF2\x80\x81\x80" },
686 { 0x00080080, "\xF2\x80\x82\x80" },
687 { 0x00080100, "\xF2\x80\x84\x80" },
688 { 0x00080200, "\xF2\x80\x88\x80" },
689 { 0x00080400, "\xF2\x80\x90\x80" },
690 { 0x00080800, "\xF2\x80\xA0\x80" },
691 { 0x00081000, "\xF2\x81\x80\x80" },
692 { 0x00082000, "\xF2\x82\x80\x80" },
693 { 0x00084000, "\xF2\x84\x80\x80" },
694 { 0x00088000, "\xF2\x88\x80\x80" },
695 { 0x00090000, "\xF2\x90\x80\x80" },
696 { 0x000A0000, "\xF2\xA0\x80\x80" },
697 { 0x000C0000, "\xF3\x80\x80\x80" },
698 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
699 { 0x00100000, "\xF4\x80\x80\x80" },
700 { 0x00100001, "\xF4\x80\x80\x81" },
701 { 0x00100002, "\xF4\x80\x80\x82" },
702 { 0x00100004, "\xF4\x80\x80\x84" },
703 { 0x00100008, "\xF4\x80\x80\x88" },
704 { 0x00100010, "\xF4\x80\x80\x90" },
705 { 0x00100020, "\xF4\x80\x80\xA0" },
706 { 0x00100040, "\xF4\x80\x81\x80" },
707 { 0x00100080, "\xF4\x80\x82\x80" },
708 { 0x00100100, "\xF4\x80\x84\x80" },
709 { 0x00100200, "\xF4\x80\x88\x80" },
710 { 0x00100400, "\xF4\x80\x90\x80" },
711 { 0x00100800, "\xF4\x80\xA0\x80" },
712 { 0x00101000, "\xF4\x81\x80\x80" },
713 { 0x00102000, "\xF4\x82\x80\x80" },
714 { 0x00104000, "\xF4\x84\x80\x80" },
715 { 0x00108000, "\xF4\x88\x80\x80" },
716 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
717 };
719 /*
720 * UCS-2 vectors
721 */
723 struct ucs2 ucs2[] = {
724 { 0x0001, "\x01" },
725 { 0x0002, "\x02" },
726 { 0x0003, "\x03" },
727 { 0x0004, "\x04" },
728 { 0x0007, "\x07" },
729 { 0x0008, "\x08" },
730 { 0x000F, "\x0F" },
731 { 0x0010, "\x10" },
732 { 0x001F, "\x1F" },
733 { 0x0020, "\x20" },
734 { 0x003F, "\x3F" },
735 { 0x0040, "\x40" },
736 { 0x007F, "\x7F" },
738 { 0x0080, "\xC2\x80" },
739 { 0x0081, "\xC2\x81" },
740 { 0x0082, "\xC2\x82" },
741 { 0x0084, "\xC2\x84" },
742 { 0x0088, "\xC2\x88" },
743 { 0x0090, "\xC2\x90" },
744 { 0x00A0, "\xC2\xA0" },
745 { 0x00C0, "\xC3\x80" },
746 { 0x00FF, "\xC3\xBF" },
747 { 0x0100, "\xC4\x80" },
748 { 0x0101, "\xC4\x81" },
749 { 0x0102, "\xC4\x82" },
750 { 0x0104, "\xC4\x84" },
751 { 0x0108, "\xC4\x88" },
752 { 0x0110, "\xC4\x90" },
753 { 0x0120, "\xC4\xA0" },
754 { 0x0140, "\xC5\x80" },
755 { 0x0180, "\xC6\x80" },
756 { 0x01FF, "\xC7\xBF" },
757 { 0x0200, "\xC8\x80" },
758 { 0x0201, "\xC8\x81" },
759 { 0x0202, "\xC8\x82" },
760 { 0x0204, "\xC8\x84" },
761 { 0x0208, "\xC8\x88" },
762 { 0x0210, "\xC8\x90" },
763 { 0x0220, "\xC8\xA0" },
764 { 0x0240, "\xC9\x80" },
765 { 0x0280, "\xCA\x80" },
766 { 0x0300, "\xCC\x80" },
767 { 0x03FF, "\xCF\xBF" },
768 { 0x0400, "\xD0\x80" },
769 { 0x0401, "\xD0\x81" },
770 { 0x0402, "\xD0\x82" },
771 { 0x0404, "\xD0\x84" },
772 { 0x0408, "\xD0\x88" },
773 { 0x0410, "\xD0\x90" },
774 { 0x0420, "\xD0\xA0" },
775 { 0x0440, "\xD1\x80" },
776 { 0x0480, "\xD2\x80" },
777 { 0x0500, "\xD4\x80" },
778 { 0x0600, "\xD8\x80" },
779 { 0x07FF, "\xDF\xBF" },
781 { 0x0800, "\xE0\xA0\x80" },
782 { 0x0801, "\xE0\xA0\x81" },
783 { 0x0802, "\xE0\xA0\x82" },
784 { 0x0804, "\xE0\xA0\x84" },
785 { 0x0808, "\xE0\xA0\x88" },
786 { 0x0810, "\xE0\xA0\x90" },
787 { 0x0820, "\xE0\xA0\xA0" },
788 { 0x0840, "\xE0\xA1\x80" },
789 { 0x0880, "\xE0\xA2\x80" },
790 { 0x0900, "\xE0\xA4\x80" },
791 { 0x0A00, "\xE0\xA8\x80" },
792 { 0x0C00, "\xE0\xB0\x80" },
793 { 0x0FFF, "\xE0\xBF\xBF" },
794 { 0x1000, "\xE1\x80\x80" },
795 { 0x1001, "\xE1\x80\x81" },
796 { 0x1002, "\xE1\x80\x82" },
797 { 0x1004, "\xE1\x80\x84" },
798 { 0x1008, "\xE1\x80\x88" },
799 { 0x1010, "\xE1\x80\x90" },
800 { 0x1020, "\xE1\x80\xA0" },
801 { 0x1040, "\xE1\x81\x80" },
802 { 0x1080, "\xE1\x82\x80" },
803 { 0x1100, "\xE1\x84\x80" },
804 { 0x1200, "\xE1\x88\x80" },
805 { 0x1400, "\xE1\x90\x80" },
806 { 0x1800, "\xE1\xA0\x80" },
807 { 0x1FFF, "\xE1\xBF\xBF" },
808 { 0x2000, "\xE2\x80\x80" },
809 { 0x2001, "\xE2\x80\x81" },
810 { 0x2002, "\xE2\x80\x82" },
811 { 0x2004, "\xE2\x80\x84" },
812 { 0x2008, "\xE2\x80\x88" },
813 { 0x2010, "\xE2\x80\x90" },
814 { 0x2020, "\xE2\x80\xA0" },
815 { 0x2040, "\xE2\x81\x80" },
816 { 0x2080, "\xE2\x82\x80" },
817 { 0x2100, "\xE2\x84\x80" },
818 { 0x2200, "\xE2\x88\x80" },
819 { 0x2400, "\xE2\x90\x80" },
820 { 0x2800, "\xE2\xA0\x80" },
821 { 0x3000, "\xE3\x80\x80" },
822 { 0x3FFF, "\xE3\xBF\xBF" },
823 { 0x4000, "\xE4\x80\x80" },
824 { 0x4001, "\xE4\x80\x81" },
825 { 0x4002, "\xE4\x80\x82" },
826 { 0x4004, "\xE4\x80\x84" },
827 { 0x4008, "\xE4\x80\x88" },
828 { 0x4010, "\xE4\x80\x90" },
829 { 0x4020, "\xE4\x80\xA0" },
830 { 0x4040, "\xE4\x81\x80" },
831 { 0x4080, "\xE4\x82\x80" },
832 { 0x4100, "\xE4\x84\x80" },
833 { 0x4200, "\xE4\x88\x80" },
834 { 0x4400, "\xE4\x90\x80" },
835 { 0x4800, "\xE4\xA0\x80" },
836 { 0x5000, "\xE5\x80\x80" },
837 { 0x6000, "\xE6\x80\x80" },
838 { 0x7FFF, "\xE7\xBF\xBF" },
839 { 0x8000, "\xE8\x80\x80" },
840 { 0x8001, "\xE8\x80\x81" },
841 { 0x8002, "\xE8\x80\x82" },
842 { 0x8004, "\xE8\x80\x84" },
843 { 0x8008, "\xE8\x80\x88" },
844 { 0x8010, "\xE8\x80\x90" },
845 { 0x8020, "\xE8\x80\xA0" },
846 { 0x8040, "\xE8\x81\x80" },
847 { 0x8080, "\xE8\x82\x80" },
848 { 0x8100, "\xE8\x84\x80" },
849 { 0x8200, "\xE8\x88\x80" },
850 { 0x8400, "\xE8\x90\x80" },
851 { 0x8800, "\xE8\xA0\x80" },
852 { 0x9000, "\xE9\x80\x80" },
853 { 0xA000, "\xEA\x80\x80" },
854 { 0xC000, "\xEC\x80\x80" },
855 { 0xFFFF, "\xEF\xBF\xBF" }
857 };
859 /*
860 * UTF-16 vectors
861 */
863 struct utf16 utf16[] = {
864 { 0x00010000, { 0xD800, 0xDC00 } },
865 { 0x00010001, { 0xD800, 0xDC01 } },
866 { 0x00010002, { 0xD800, 0xDC02 } },
867 { 0x00010003, { 0xD800, 0xDC03 } },
868 { 0x00010004, { 0xD800, 0xDC04 } },
869 { 0x00010007, { 0xD800, 0xDC07 } },
870 { 0x00010008, { 0xD800, 0xDC08 } },
871 { 0x0001000F, { 0xD800, 0xDC0F } },
872 { 0x00010010, { 0xD800, 0xDC10 } },
873 { 0x0001001F, { 0xD800, 0xDC1F } },
874 { 0x00010020, { 0xD800, 0xDC20 } },
875 { 0x0001003F, { 0xD800, 0xDC3F } },
876 { 0x00010040, { 0xD800, 0xDC40 } },
877 { 0x0001007F, { 0xD800, 0xDC7F } },
878 { 0x00010080, { 0xD800, 0xDC80 } },
879 { 0x00010081, { 0xD800, 0xDC81 } },
880 { 0x00010082, { 0xD800, 0xDC82 } },
881 { 0x00010084, { 0xD800, 0xDC84 } },
882 { 0x00010088, { 0xD800, 0xDC88 } },
883 { 0x00010090, { 0xD800, 0xDC90 } },
884 { 0x000100A0, { 0xD800, 0xDCA0 } },
885 { 0x000100C0, { 0xD800, 0xDCC0 } },
886 { 0x000100FF, { 0xD800, 0xDCFF } },
887 { 0x00010100, { 0xD800, 0xDD00 } },
888 { 0x00010101, { 0xD800, 0xDD01 } },
889 { 0x00010102, { 0xD800, 0xDD02 } },
890 { 0x00010104, { 0xD800, 0xDD04 } },
891 { 0x00010108, { 0xD800, 0xDD08 } },
892 { 0x00010110, { 0xD800, 0xDD10 } },
893 { 0x00010120, { 0xD800, 0xDD20 } },
894 { 0x00010140, { 0xD800, 0xDD40 } },
895 { 0x00010180, { 0xD800, 0xDD80 } },
896 { 0x000101FF, { 0xD800, 0xDDFF } },
897 { 0x00010200, { 0xD800, 0xDE00 } },
898 { 0x00010201, { 0xD800, 0xDE01 } },
899 { 0x00010202, { 0xD800, 0xDE02 } },
900 { 0x00010204, { 0xD800, 0xDE04 } },
901 { 0x00010208, { 0xD800, 0xDE08 } },
902 { 0x00010210, { 0xD800, 0xDE10 } },
903 { 0x00010220, { 0xD800, 0xDE20 } },
904 { 0x00010240, { 0xD800, 0xDE40 } },
905 { 0x00010280, { 0xD800, 0xDE80 } },
906 { 0x00010300, { 0xD800, 0xDF00 } },
907 { 0x000103FF, { 0xD800, 0xDFFF } },
908 { 0x00010400, { 0xD801, 0xDC00 } },
909 { 0x00010401, { 0xD801, 0xDC01 } },
910 { 0x00010402, { 0xD801, 0xDC02 } },
911 { 0x00010404, { 0xD801, 0xDC04 } },
912 { 0x00010408, { 0xD801, 0xDC08 } },
913 { 0x00010410, { 0xD801, 0xDC10 } },
914 { 0x00010420, { 0xD801, 0xDC20 } },
915 { 0x00010440, { 0xD801, 0xDC40 } },
916 { 0x00010480, { 0xD801, 0xDC80 } },
917 { 0x00010500, { 0xD801, 0xDD00 } },
918 { 0x00010600, { 0xD801, 0xDE00 } },
919 { 0x000107FF, { 0xD801, 0xDFFF } },
920 { 0x00010800, { 0xD802, 0xDC00 } },
921 { 0x00010801, { 0xD802, 0xDC01 } },
922 { 0x00010802, { 0xD802, 0xDC02 } },
923 { 0x00010804, { 0xD802, 0xDC04 } },
924 { 0x00010808, { 0xD802, 0xDC08 } },
925 { 0x00010810, { 0xD802, 0xDC10 } },
926 { 0x00010820, { 0xD802, 0xDC20 } },
927 { 0x00010840, { 0xD802, 0xDC40 } },
928 { 0x00010880, { 0xD802, 0xDC80 } },
929 { 0x00010900, { 0xD802, 0xDD00 } },
930 { 0x00010A00, { 0xD802, 0xDE00 } },
931 { 0x00010C00, { 0xD803, 0xDC00 } },
932 { 0x00010FFF, { 0xD803, 0xDFFF } },
933 { 0x00011000, { 0xD804, 0xDC00 } },
934 { 0x00011001, { 0xD804, 0xDC01 } },
935 { 0x00011002, { 0xD804, 0xDC02 } },
936 { 0x00011004, { 0xD804, 0xDC04 } },
937 { 0x00011008, { 0xD804, 0xDC08 } },
938 { 0x00011010, { 0xD804, 0xDC10 } },
939 { 0x00011020, { 0xD804, 0xDC20 } },
940 { 0x00011040, { 0xD804, 0xDC40 } },
941 { 0x00011080, { 0xD804, 0xDC80 } },
942 { 0x00011100, { 0xD804, 0xDD00 } },
943 { 0x00011200, { 0xD804, 0xDE00 } },
944 { 0x00011400, { 0xD805, 0xDC00 } },
945 { 0x00011800, { 0xD806, 0xDC00 } },
946 { 0x00011FFF, { 0xD807, 0xDFFF } },
947 { 0x00012000, { 0xD808, 0xDC00 } },
948 { 0x00012001, { 0xD808, 0xDC01 } },
949 { 0x00012002, { 0xD808, 0xDC02 } },
950 { 0x00012004, { 0xD808, 0xDC04 } },
951 { 0x00012008, { 0xD808, 0xDC08 } },
952 { 0x00012010, { 0xD808, 0xDC10 } },
953 { 0x00012020, { 0xD808, 0xDC20 } },
954 { 0x00012040, { 0xD808, 0xDC40 } },
955 { 0x00012080, { 0xD808, 0xDC80 } },
956 { 0x00012100, { 0xD808, 0xDD00 } },
957 { 0x00012200, { 0xD808, 0xDE00 } },
958 { 0x00012400, { 0xD809, 0xDC00 } },
959 { 0x00012800, { 0xD80A, 0xDC00 } },
960 { 0x00013000, { 0xD80C, 0xDC00 } },
961 { 0x00013FFF, { 0xD80F, 0xDFFF } },
962 { 0x00014000, { 0xD810, 0xDC00 } },
963 { 0x00014001, { 0xD810, 0xDC01 } },
964 { 0x00014002, { 0xD810, 0xDC02 } },
965 { 0x00014004, { 0xD810, 0xDC04 } },
966 { 0x00014008, { 0xD810, 0xDC08 } },
967 { 0x00014010, { 0xD810, 0xDC10 } },
968 { 0x00014020, { 0xD810, 0xDC20 } },
969 { 0x00014040, { 0xD810, 0xDC40 } },
970 { 0x00014080, { 0xD810, 0xDC80 } },
971 { 0x00014100, { 0xD810, 0xDD00 } },
972 { 0x00014200, { 0xD810, 0xDE00 } },
973 { 0x00014400, { 0xD811, 0xDC00 } },
974 { 0x00014800, { 0xD812, 0xDC00 } },
975 { 0x00015000, { 0xD814, 0xDC00 } },
976 { 0x00016000, { 0xD818, 0xDC00 } },
977 { 0x00017FFF, { 0xD81F, 0xDFFF } },
978 { 0x00018000, { 0xD820, 0xDC00 } },
979 { 0x00018001, { 0xD820, 0xDC01 } },
980 { 0x00018002, { 0xD820, 0xDC02 } },
981 { 0x00018004, { 0xD820, 0xDC04 } },
982 { 0x00018008, { 0xD820, 0xDC08 } },
983 { 0x00018010, { 0xD820, 0xDC10 } },
984 { 0x00018020, { 0xD820, 0xDC20 } },
985 { 0x00018040, { 0xD820, 0xDC40 } },
986 { 0x00018080, { 0xD820, 0xDC80 } },
987 { 0x00018100, { 0xD820, 0xDD00 } },
988 { 0x00018200, { 0xD820, 0xDE00 } },
989 { 0x00018400, { 0xD821, 0xDC00 } },
990 { 0x00018800, { 0xD822, 0xDC00 } },
991 { 0x00019000, { 0xD824, 0xDC00 } },
992 { 0x0001A000, { 0xD828, 0xDC00 } },
993 { 0x0001C000, { 0xD830, 0xDC00 } },
994 { 0x0001FFFF, { 0xD83F, 0xDFFF } },
995 { 0x00020000, { 0xD840, 0xDC00 } },
996 { 0x00020001, { 0xD840, 0xDC01 } },
997 { 0x00020002, { 0xD840, 0xDC02 } },
998 { 0x00020004, { 0xD840, 0xDC04 } },
999 { 0x00020008, { 0xD840, 0xDC08 } },
1000 { 0x00020010, { 0xD840, 0xDC10 } },
1001 { 0x00020020, { 0xD840, 0xDC20 } },
1002 { 0x00020040, { 0xD840, 0xDC40 } },
1003 { 0x00020080, { 0xD840, 0xDC80 } },
1004 { 0x00020100, { 0xD840, 0xDD00 } },
1005 { 0x00020200, { 0xD840, 0xDE00 } },
1006 { 0x00020400, { 0xD841, 0xDC00 } },
1007 { 0x00020800, { 0xD842, 0xDC00 } },
1008 { 0x00021000, { 0xD844, 0xDC00 } },
1009 { 0x00022000, { 0xD848, 0xDC00 } },
1010 { 0x00024000, { 0xD850, 0xDC00 } },
1011 { 0x00028000, { 0xD860, 0xDC00 } },
1012 { 0x0002FFFF, { 0xD87F, 0xDFFF } },
1013 { 0x00030000, { 0xD880, 0xDC00 } },
1014 { 0x00030001, { 0xD880, 0xDC01 } },
1015 { 0x00030002, { 0xD880, 0xDC02 } },
1016 { 0x00030004, { 0xD880, 0xDC04 } },
1017 { 0x00030008, { 0xD880, 0xDC08 } },
1018 { 0x00030010, { 0xD880, 0xDC10 } },
1019 { 0x00030020, { 0xD880, 0xDC20 } },
1020 { 0x00030040, { 0xD880, 0xDC40 } },
1021 { 0x00030080, { 0xD880, 0xDC80 } },
1022 { 0x00030100, { 0xD880, 0xDD00 } },
1023 { 0x00030200, { 0xD880, 0xDE00 } },
1024 { 0x00030400, { 0xD881, 0xDC00 } },
1025 { 0x00030800, { 0xD882, 0xDC00 } },
1026 { 0x00031000, { 0xD884, 0xDC00 } },
1027 { 0x00032000, { 0xD888, 0xDC00 } },
1028 { 0x00034000, { 0xD890, 0xDC00 } },
1029 { 0x00038000, { 0xD8A0, 0xDC00 } },
1030 { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
1031 { 0x00040000, { 0xD8C0, 0xDC00 } },
1032 { 0x00040001, { 0xD8C0, 0xDC01 } },
1033 { 0x00040002, { 0xD8C0, 0xDC02 } },
1034 { 0x00040004, { 0xD8C0, 0xDC04 } },
1035 { 0x00040008, { 0xD8C0, 0xDC08 } },
1036 { 0x00040010, { 0xD8C0, 0xDC10 } },
1037 { 0x00040020, { 0xD8C0, 0xDC20 } },
1038 { 0x00040040, { 0xD8C0, 0xDC40 } },
1039 { 0x00040080, { 0xD8C0, 0xDC80 } },
1040 { 0x00040100, { 0xD8C0, 0xDD00 } },
1041 { 0x00040200, { 0xD8C0, 0xDE00 } },
1042 { 0x00040400, { 0xD8C1, 0xDC00 } },
1043 { 0x00040800, { 0xD8C2, 0xDC00 } },
1044 { 0x00041000, { 0xD8C4, 0xDC00 } },
1045 { 0x00042000, { 0xD8C8, 0xDC00 } },
1046 { 0x00044000, { 0xD8D0, 0xDC00 } },
1047 { 0x00048000, { 0xD8E0, 0xDC00 } },
1048 { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
1049 { 0x00050000, { 0xD900, 0xDC00 } },
1050 { 0x00050001, { 0xD900, 0xDC01 } },
1051 { 0x00050002, { 0xD900, 0xDC02 } },
1052 { 0x00050004, { 0xD900, 0xDC04 } },
1053 { 0x00050008, { 0xD900, 0xDC08 } },
1054 { 0x00050010, { 0xD900, 0xDC10 } },
1055 { 0x00050020, { 0xD900, 0xDC20 } },
1056 { 0x00050040, { 0xD900, 0xDC40 } },
1057 { 0x00050080, { 0xD900, 0xDC80 } },
1058 { 0x00050100, { 0xD900, 0xDD00 } },
1059 { 0x00050200, { 0xD900, 0xDE00 } },
1060 { 0x00050400, { 0xD901, 0xDC00 } },
1061 { 0x00050800, { 0xD902, 0xDC00 } },
1062 { 0x00051000, { 0xD904, 0xDC00 } },
1063 { 0x00052000, { 0xD908, 0xDC00 } },
1064 { 0x00054000, { 0xD910, 0xDC00 } },
1065 { 0x00058000, { 0xD920, 0xDC00 } },
1066 { 0x00060000, { 0xD940, 0xDC00 } },
1067 { 0x00070000, { 0xD980, 0xDC00 } },
1068 { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
1069 { 0x00080000, { 0xD9C0, 0xDC00 } },
1070 { 0x00080001, { 0xD9C0, 0xDC01 } },
1071 { 0x00080002, { 0xD9C0, 0xDC02 } },
1072 { 0x00080004, { 0xD9C0, 0xDC04 } },
1073 { 0x00080008, { 0xD9C0, 0xDC08 } },
1074 { 0x00080010, { 0xD9C0, 0xDC10 } },
1075 { 0x00080020, { 0xD9C0, 0xDC20 } },
1076 { 0x00080040, { 0xD9C0, 0xDC40 } },
1077 { 0x00080080, { 0xD9C0, 0xDC80 } },
1078 { 0x00080100, { 0xD9C0, 0xDD00 } },
1079 { 0x00080200, { 0xD9C0, 0xDE00 } },
1080 { 0x00080400, { 0xD9C1, 0xDC00 } },
1081 { 0x00080800, { 0xD9C2, 0xDC00 } },
1082 { 0x00081000, { 0xD9C4, 0xDC00 } },
1083 { 0x00082000, { 0xD9C8, 0xDC00 } },
1084 { 0x00084000, { 0xD9D0, 0xDC00 } },
1085 { 0x00088000, { 0xD9E0, 0xDC00 } },
1086 { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
1087 { 0x00090000, { 0xDA00, 0xDC00 } },
1088 { 0x00090001, { 0xDA00, 0xDC01 } },
1089 { 0x00090002, { 0xDA00, 0xDC02 } },
1090 { 0x00090004, { 0xDA00, 0xDC04 } },
1091 { 0x00090008, { 0xDA00, 0xDC08 } },
1092 { 0x00090010, { 0xDA00, 0xDC10 } },
1093 { 0x00090020, { 0xDA00, 0xDC20 } },
1094 { 0x00090040, { 0xDA00, 0xDC40 } },
1095 { 0x00090080, { 0xDA00, 0xDC80 } },
1096 { 0x00090100, { 0xDA00, 0xDD00 } },
1097 { 0x00090200, { 0xDA00, 0xDE00 } },
1098 { 0x00090400, { 0xDA01, 0xDC00 } },
1099 { 0x00090800, { 0xDA02, 0xDC00 } },
1100 { 0x00091000, { 0xDA04, 0xDC00 } },
1101 { 0x00092000, { 0xDA08, 0xDC00 } },
1102 { 0x00094000, { 0xDA10, 0xDC00 } },
1103 { 0x00098000, { 0xDA20, 0xDC00 } },
1104 { 0x000A0000, { 0xDA40, 0xDC00 } },
1105 { 0x000B0000, { 0xDA80, 0xDC00 } },
1106 { 0x000C0000, { 0xDAC0, 0xDC00 } },
1107 { 0x000D0000, { 0xDB00, 0xDC00 } },
1108 { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
1109 { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
1111 };
1113 /* illegal utf8 sequences */
1114 char *utf8_bad[] = {
1115 "\xC0\x80",
1116 "\xC1\xBF",
1117 "\xE0\x80\x80",
1118 "\xE0\x9F\xBF",
1119 "\xF0\x80\x80\x80",
1120 "\xF0\x8F\xBF\xBF",
1121 "\xF4\x90\x80\x80",
1122 "\xF7\xBF\xBF\xBF",
1123 "\xF8\x80\x80\x80\x80",
1124 "\xF8\x88\x80\x80\x80",
1125 "\xF8\x92\x80\x80\x80",
1126 "\xF8\x9F\xBF\xBF\xBF",
1127 "\xF8\xA0\x80\x80\x80",
1128 "\xF8\xA8\x80\x80\x80",
1129 "\xF8\xB0\x80\x80\x80",
1130 "\xF8\xBF\xBF\xBF\xBF",
1131 "\xF9\x80\x80\x80\x88",
1132 "\xF9\x84\x80\x80\x80",
1133 "\xF9\xBF\xBF\xBF\xBF",
1134 "\xFA\x80\x80\x80\x80",
1135 "\xFA\x90\x80\x80\x80",
1136 "\xFB\xBF\xBF\xBF\xBF",
1137 "\xFC\x84\x80\x80\x80\x81",
1138 "\xFC\x85\x80\x80\x80\x80",
1139 "\xFC\x86\x80\x80\x80\x80",
1140 "\xFC\x87\xBF\xBF\xBF\xBF",
1141 "\xFC\x88\xA0\x80\x80\x80",
1142 "\xFC\x89\x80\x80\x80\x80",
1143 "\xFC\x8A\x80\x80\x80\x80",
1144 "\xFC\x90\x80\x80\x80\x82",
1145 "\xFD\x80\x80\x80\x80\x80",
1146 "\xFD\xBF\xBF\xBF\xBF\xBF",
1147 "\x80",
1148 "\xC3",
1149 "\xC3\xC3\x80",
1150 "\xED\xA0\x80",
1151 "\xED\xBF\x80",
1152 "\xED\xBF\xBF",
1153 "\xED\xA0\x80\xE0\xBF\xBF",
1154 };
1156 static void
1157 dump_utf8
1158 (
1159 char *word,
1160 unsigned char *utf8,
1161 char *end
1162 )
1163 {
1164 fprintf(stdout, "%s ", word);
1165 for( ; *utf8; utf8++ ) {
1166 fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
1167 }
1168 fprintf(stdout, "%s", end);
1169 }
1171 static PRBool
1172 test_ucs4_chars
1173 (
1174 void
1175 )
1176 {
1177 PRBool rv = PR_TRUE;
1178 int i;
1180 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1181 struct ucs4 *e = &ucs4[i];
1182 PRBool result;
1183 unsigned char utf8[8];
1184 unsigned int len = 0;
1185 PRUint32 back = 0;
1187 (void)memset(utf8, 0, sizeof(utf8));
1189 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1190 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1192 if( !result ) {
1193 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
1194 rv = PR_FALSE;
1195 continue;
1196 }
1198 if( (len >= sizeof(utf8)) ||
1199 (strlen(e->utf8) != len) ||
1200 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1201 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
1202 dump_utf8("expected", e->utf8, ", ");
1203 dump_utf8("received", utf8, "\n");
1204 rv = PR_FALSE;
1205 continue;
1206 }
1208 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1209 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1211 if( !result ) {
1212 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
1213 rv = PR_FALSE;
1214 continue;
1215 }
1217 if( (sizeof(back) != len) || (e->c != back) ) {
1218 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
1219 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1220 rv = PR_FALSE;
1221 continue;
1222 }
1223 }
1225 return rv;
1226 }
1228 static PRBool
1229 test_ucs2_chars
1230 (
1231 void
1232 )
1233 {
1234 PRBool rv = PR_TRUE;
1235 int i;
1237 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1238 struct ucs2 *e = &ucs2[i];
1239 PRBool result;
1240 unsigned char utf8[8];
1241 unsigned int len = 0;
1242 PRUint16 back = 0;
1244 (void)memset(utf8, 0, sizeof(utf8));
1246 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1247 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1249 if( !result ) {
1250 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
1251 rv = PR_FALSE;
1252 continue;
1253 }
1255 if( (len >= sizeof(utf8)) ||
1256 (strlen(e->utf8) != len) ||
1257 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1258 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
1259 dump_utf8("expected", e->utf8, ", ");
1260 dump_utf8("received", utf8, "\n");
1261 rv = PR_FALSE;
1262 continue;
1263 }
1265 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1266 utf8, len, (unsigned char *)&back, sizeof(back), &len);
1268 if( !result ) {
1269 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
1270 rv = PR_FALSE;
1271 continue;
1272 }
1274 if( (sizeof(back) != len) || (e->c != back) ) {
1275 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
1276 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
1277 rv = PR_FALSE;
1278 continue;
1279 }
1280 }
1282 return rv;
1283 }
1285 static PRBool
1286 test_utf16_chars
1287 (
1288 void
1289 )
1290 {
1291 PRBool rv = PR_TRUE;
1292 int i;
1294 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1295 struct utf16 *e = &utf16[i];
1296 PRBool result;
1297 unsigned char utf8[8];
1298 unsigned int len = 0;
1299 PRUint32 back32 = 0;
1300 PRUint16 back[2];
1302 (void)memset(utf8, 0, sizeof(utf8));
1304 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1305 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
1307 if( !result ) {
1308 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n",
1309 e->w[0], e->w[1]);
1310 rv = PR_FALSE;
1311 continue;
1312 }
1314 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1315 utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
1317 if( 4 != len ) {
1318 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
1319 "unexpected len %d\n", e->w[0], e->w[1], len);
1320 rv = PR_FALSE;
1321 continue;
1322 }
1324 utf8[len] = '\0'; /* null-terminate for printing */
1326 if( !result ) {
1327 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
1328 rv = PR_FALSE;
1329 continue;
1330 }
1332 if( (sizeof(back32) != len) || (e->c != back32) ) {
1333 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ",
1334 e->w[0], e->w[1]);
1335 dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
1336 if( sizeof(back32) != len ) {
1337 fprintf(stdout, "len is %d\n", len);
1338 } else {
1339 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
1340 }
1341 rv = PR_FALSE;
1342 continue;
1343 }
1345 (void)memset(utf8, 0, sizeof(utf8));
1346 back[0] = back[1] = 0;
1348 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1349 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
1351 if( !result ) {
1352 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n",
1353 e->c);
1354 rv = PR_FALSE;
1355 continue;
1356 }
1358 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1359 utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
1361 if( 4 != len ) {
1362 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
1363 "unexpected len %d\n", e->c, len);
1364 rv = PR_FALSE;
1365 continue;
1366 }
1368 utf8[len] = '\0'; /* null-terminate for printing */
1370 if( !result ) {
1371 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
1372 rv = PR_FALSE;
1373 continue;
1374 }
1376 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
1377 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
1378 dump_utf8("", utf8, "and then to UTF-16:");
1379 if( sizeof(back) != len ) {
1380 fprintf(stdout, "len is %d\n", len);
1381 } else {
1382 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n",
1383 e->w[0], e->w[1], back[0], back[1]);
1384 }
1385 rv = PR_FALSE;
1386 continue;
1387 }
1388 }
1390 return rv;
1391 }
1393 static PRBool
1394 test_utf8_bad_chars
1395 (
1396 void
1397 )
1398 {
1399 PRBool rv = PR_TRUE;
1400 int i;
1402 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
1403 PRBool result;
1404 unsigned char destbuf[30];
1405 unsigned int len = 0;
1407 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1408 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
1410 if( result ) {
1411 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n");
1412 rv = PR_FALSE;
1413 continue;
1414 }
1415 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1416 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
1418 if( result ) {
1419 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n");
1420 rv = PR_FALSE;
1421 continue;
1422 }
1424 }
1426 return rv;
1427 }
1429 static PRBool
1430 test_iso88591_chars
1431 (
1432 void
1433 )
1434 {
1435 PRBool rv = PR_TRUE;
1436 int i;
1438 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1439 struct ucs2 *e = &ucs2[i];
1440 PRBool result;
1441 unsigned char iso88591;
1442 unsigned char utf8[3];
1443 unsigned int len = 0;
1445 if (ntohs(e->c) > 0xFF) continue;
1447 (void)memset(utf8, 0, sizeof(utf8));
1448 iso88591 = ntohs(e->c);
1450 result = sec_port_iso88591_utf8_conversion_function(&iso88591,
1451 1, utf8, sizeof(utf8), &len);
1453 if( !result ) {
1454 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591);
1455 rv = PR_FALSE;
1456 continue;
1457 }
1459 if( (len >= sizeof(utf8)) ||
1460 (strlen(e->utf8) != len) ||
1461 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
1462 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591);
1463 dump_utf8("expected", e->utf8, ", ");
1464 dump_utf8("received", utf8, "\n");
1465 rv = PR_FALSE;
1466 continue;
1467 }
1469 }
1471 return rv;
1472 }
1474 static PRBool
1475 test_zeroes
1476 (
1477 void
1478 )
1479 {
1480 PRBool rv = PR_TRUE;
1481 PRBool result;
1482 PRUint32 lzero = 0;
1483 PRUint16 szero = 0;
1484 unsigned char utf8[8];
1485 unsigned int len = 0;
1486 PRUint32 lback = 1;
1487 PRUint16 sback = 1;
1489 (void)memset(utf8, 1, sizeof(utf8));
1491 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1492 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
1494 if( !result ) {
1495 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
1496 rv = PR_FALSE;
1497 } else if( 1 != len ) {
1498 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
1499 rv = PR_FALSE;
1500 } else if( '\0' != *utf8 ) {
1501 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
1502 "received %02.2x\n", (unsigned int)*utf8);
1503 rv = PR_FALSE;
1504 }
1506 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1507 "", 1, (unsigned char *)&lback, sizeof(lback), &len);
1509 if( !result ) {
1510 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
1511 rv = PR_FALSE;
1512 } else if( 4 != len ) {
1513 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
1514 rv = PR_FALSE;
1515 } else if( 0 != lback ) {
1516 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
1517 "expected 0x00000000, received 0x%08.8x\n", lback);
1518 rv = PR_FALSE;
1519 }
1521 (void)memset(utf8, 1, sizeof(utf8));
1523 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1524 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
1526 if( !result ) {
1527 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
1528 rv = PR_FALSE;
1529 } else if( 1 != len ) {
1530 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
1531 rv = PR_FALSE;
1532 } else if( '\0' != *utf8 ) {
1533 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
1534 "received %02.2x\n", (unsigned int)*utf8);
1535 rv = PR_FALSE;
1536 }
1538 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1539 "", 1, (unsigned char *)&sback, sizeof(sback), &len);
1541 if( !result ) {
1542 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
1543 rv = PR_FALSE;
1544 } else if( 2 != len ) {
1545 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
1546 rv = PR_FALSE;
1547 } else if( 0 != sback ) {
1548 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
1549 "expected 0x0000, received 0x%04.4x\n", sback);
1550 rv = PR_FALSE;
1551 }
1553 return rv;
1554 }
1556 static PRBool
1557 test_multichars
1558 (
1559 void
1560 )
1561 {
1562 int i;
1563 unsigned int len, lenout;
1564 PRUint32 *ucs4s;
1565 char *ucs4_utf8;
1566 PRUint16 *ucs2s;
1567 char *ucs2_utf8;
1568 void *tmp;
1569 PRBool result;
1571 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
1572 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
1574 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
1575 fprintf(stderr, "out of memory\n");
1576 exit(1);
1577 }
1579 len = 0;
1580 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1581 ucs4s[i] = ucs4[i].c;
1582 len += strlen(ucs4[i].utf8);
1583 }
1585 ucs4_utf8 = (char *)malloc(len);
1587 len = 0;
1588 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1589 ucs2s[i] = ucs2[i].c;
1590 len += strlen(ucs2[i].utf8);
1591 }
1593 ucs2_utf8 = (char *)malloc(len);
1595 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
1596 fprintf(stderr, "out of memory\n");
1597 exit(1);
1598 }
1600 *ucs4_utf8 = '\0';
1601 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1602 strcat(ucs4_utf8, ucs4[i].utf8);
1603 }
1605 *ucs2_utf8 = '\0';
1606 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1607 strcat(ucs2_utf8, ucs2[i].utf8);
1608 }
1610 /* UTF-8 -> UCS-4 */
1611 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
1612 tmp = calloc(len, 1);
1613 if( (void *)NULL == tmp ) {
1614 fprintf(stderr, "out of memory\n");
1615 exit(1);
1616 }
1618 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
1619 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
1620 if( !result ) {
1621 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
1622 goto done;
1623 }
1625 if( lenout != len ) {
1626 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
1627 goto loser;
1628 }
1630 if( 0 != memcmp(ucs4s, tmp, len) ) {
1631 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
1632 goto loser;
1633 }
1635 free(tmp); tmp = (void *)NULL;
1637 /* UCS-4 -> UTF-8 */
1638 len = strlen(ucs4_utf8);
1639 tmp = calloc(len, 1);
1640 if( (void *)NULL == tmp ) {
1641 fprintf(stderr, "out of memory\n");
1642 exit(1);
1643 }
1645 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
1646 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32),
1647 tmp, len, &lenout);
1648 if( !result ) {
1649 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
1650 goto done;
1651 }
1653 if( lenout != len ) {
1654 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
1655 goto loser;
1656 }
1658 if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
1659 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
1660 goto loser;
1661 }
1663 free(tmp); tmp = (void *)NULL;
1665 /* UTF-8 -> UCS-2 */
1666 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
1667 tmp = calloc(len, 1);
1668 if( (void *)NULL == tmp ) {
1669 fprintf(stderr, "out of memory\n");
1670 exit(1);
1671 }
1673 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
1674 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
1675 if( !result ) {
1676 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
1677 goto done;
1678 }
1680 if( lenout != len ) {
1681 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
1682 goto loser;
1683 }
1685 if( 0 != memcmp(ucs2s, tmp, len) ) {
1686 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
1687 goto loser;
1688 }
1690 free(tmp); tmp = (void *)NULL;
1692 /* UCS-2 -> UTF-8 */
1693 len = strlen(ucs2_utf8);
1694 tmp = calloc(len, 1);
1695 if( (void *)NULL == tmp ) {
1696 fprintf(stderr, "out of memory\n");
1697 exit(1);
1698 }
1700 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
1701 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16),
1702 tmp, len, &lenout);
1703 if( !result ) {
1704 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
1705 goto done;
1706 }
1708 if( lenout != len ) {
1709 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
1710 goto loser;
1711 }
1713 if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
1714 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
1715 goto loser;
1716 }
1718 /* implement UTF16 */
1720 result = PR_TRUE;
1721 goto done;
1723 loser:
1724 result = PR_FALSE;
1725 done:
1726 free(ucs4s);
1727 free(ucs4_utf8);
1728 free(ucs2s);
1729 free(ucs2_utf8);
1730 if( (void *)NULL != tmp ) free(tmp);
1731 return result;
1732 }
1734 void
1735 byte_order
1736 (
1737 void
1738 )
1739 {
1740 /*
1741 * The implementation (now) expects the 16- and 32-bit characters
1742 * to be in network byte order, not host byte order. Therefore I
1743 * have to byteswap all those test vectors above. hton[ls] may be
1744 * functions, so I have to do this dynamically. If you want to
1745 * use this code to do host byte order conversions, just remove
1746 * the call in main() to this function.
1747 */
1749 int i;
1751 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
1752 struct ucs4 *e = &ucs4[i];
1753 e->c = htonl(e->c);
1754 }
1756 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
1757 struct ucs2 *e = &ucs2[i];
1758 e->c = htons(e->c);
1759 }
1761 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
1762 struct utf16 *e = &utf16[i];
1763 e->c = htonl(e->c);
1764 e->w[0] = htons(e->w[0]);
1765 e->w[1] = htons(e->w[1]);
1766 }
1768 return;
1769 }
1771 int
1772 main
1773 (
1774 int argc,
1775 char *argv[]
1776 )
1777 {
1778 byte_order();
1780 if( test_ucs4_chars() &&
1781 test_ucs2_chars() &&
1782 test_utf16_chars() &&
1783 test_utf8_bad_chars() &&
1784 test_iso88591_chars() &&
1785 test_zeroes() &&
1786 test_multichars() &&
1787 PR_TRUE ) {
1788 fprintf(stderr, "PASS\n");
1789 return 1;
1790 } else {
1791 fprintf(stderr, "FAIL\n");
1792 return 0;
1793 }
1794 }
1796 #endif /* TEST_UTF8 */