security/nss/lib/util/utf8.c

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* This Source Code Form is subject to the terms of the Mozilla Public
     2  * License, v. 2.0. If a copy of the MPL was not distributed with this
     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "seccomon.h"
     6 #include "secport.h"
     8 #ifdef TEST_UTF8
     9 #include <assert.h>
    10 #undef PORT_Assert
    11 #define PORT_Assert assert
    12 #endif
    14 /*
    15  * From RFC 2044:
    16  *
    17  * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
    18  * 0000 0000-0000 007F   0xxxxxxx
    19  * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
    20  * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
    21  * 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
    22  * 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    23  * 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
    24  */  
    26 /*
    27  * From http://www.imc.org/draft-hoffman-utf16
    28  *
    29  * For U on [0x00010000,0x0010FFFF]:  Let U' = U - 0x00010000
    30  *
    31  * U' = yyyyyyyyyyxxxxxxxxxx
    32  * W1 = 110110yyyyyyyyyy
    33  * W2 = 110111xxxxxxxxxx
    34  */
    36 /*
    37  * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
    38  * character values.  If you wish to use this code for working with
    39  * host byte order values, define the following:
    40  *
    41  * #if IS_BIG_ENDIAN
    42  * #define L_0 0
    43  * #define L_1 1
    44  * #define L_2 2
    45  * #define L_3 3
    46  * #define H_0 0
    47  * #define H_1 1
    48  * #else / * not everyone has elif * /
    49  * #if IS_LITTLE_ENDIAN
    50  * #define L_0 3
    51  * #define L_1 2
    52  * #define L_2 1
    53  * #define L_3 0
    54  * #define H_0 1
    55  * #define H_1 0
    56  * #else
    57  * #error "PDP and NUXI support deferred"
    58  * #endif / * IS_LITTLE_ENDIAN * /
    59  * #endif / * IS_BIG_ENDIAN * /
    60  */
    62 #define L_0 0
    63 #define L_1 1
    64 #define L_2 2
    65 #define L_3 3
    66 #define H_0 0
    67 #define H_1 1
    69 #define BAD_UTF8 ((PRUint32)-1)
    71 /*
    72  * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
    73  * of Unicode 4.0.0.
    74  *
    75  * Parameters:
    76  * index - Points to the byte offset in inBuf of character to read.  On success,
    77  *         updated to the offset of the following character.
    78  * inBuf - Input buffer, UTF-8 encoded
    79  * inbufLen - Length of input buffer, in bytes.
    80  *
    81  * Returns:
    82  * Success - The UCS4 encoded character
    83  * Failure - BAD_UTF8
    84  */
    85 static PRUint32
    86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
    87 {
    88   PRUint32 result;
    89   unsigned int i = *index;
    90   int bytes_left;
    91   PRUint32 min_value;
    93   PORT_Assert(i < inBufLen);
    95   if ( (inBuf[i] & 0x80) == 0x00 ) {
    96     result = inBuf[i++];
    97     bytes_left = 0;
    98     min_value = 0;
    99   } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
   100     result = inBuf[i++] & 0x1F;
   101     bytes_left = 1;
   102     min_value = 0x80;
   103   } else if ( (inBuf[i] & 0xF0) == 0xE0) {
   104     result = inBuf[i++] & 0x0F;
   105     bytes_left = 2;
   106     min_value = 0x800;
   107   } else if ( (inBuf[i] & 0xF8) == 0xF0) {
   108     result = inBuf[i++] & 0x07;
   109     bytes_left = 3;
   110     min_value = 0x10000;
   111   } else {
   112     return BAD_UTF8;
   113   }
   115   while (bytes_left--) {
   116     if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
   117     result = (result << 6) | (inBuf[i++] & 0x3F);
   118   }
   120   /* Check for overlong sequences, surrogates, and outside unicode range */
   121   if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
   122     return BAD_UTF8;
   123   }
   125   *index = i;
   126   return result;
   127 }
   129 PRBool
   130 sec_port_ucs4_utf8_conversion_function
   131 (
   132   PRBool toUnicode,
   133   unsigned char *inBuf,
   134   unsigned int inBufLen,
   135   unsigned char *outBuf,
   136   unsigned int maxOutBufLen,
   137   unsigned int *outBufLen
   138 )
   139 {
   140   PORT_Assert((unsigned int *)NULL != outBufLen);
   142   if( toUnicode ) {
   143     unsigned int i, len = 0;
   145     for( i = 0; i < inBufLen; ) {
   146       if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
   147       else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
   148       else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
   149       else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
   150       else return PR_FALSE;
   152       len += 4;
   153     }
   155     if( len > maxOutBufLen ) {
   156       *outBufLen = len;
   157       return PR_FALSE;
   158     }
   160     len = 0;
   162     for( i = 0; i < inBufLen; ) {
   163       PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
   165       if (ucs4 == BAD_UTF8) return PR_FALSE;
   167       outBuf[len+L_0] = 0x00;
   168       outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
   169       outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
   170       outBuf[len+L_3] = (unsigned char)ucs4;
   172       len += 4;
   173     }
   175     *outBufLen = len;
   176     return PR_TRUE;
   177   } else {
   178     unsigned int i, len = 0;
   179     PORT_Assert((inBufLen % 4) == 0);
   180     if ((inBufLen % 4) != 0) {
   181       *outBufLen = 0;
   182       return PR_FALSE;
   183     }
   185     for( i = 0; i < inBufLen; i += 4 ) {
   186       if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
   187 	*outBufLen = 0;
   188 	return PR_FALSE;
   189       } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
   190       else if( inBuf[i+L_2] >= 0x08 ) len += 3;
   191       else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
   192       else len += 1;
   193     }
   195     if( len > maxOutBufLen ) {
   196       *outBufLen = len;
   197       return PR_FALSE;
   198     }
   200     len = 0;
   202     for( i = 0; i < inBufLen; i += 4 ) {
   203       if( inBuf[i+L_1] >= 0x01 ) {
   204         /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
   205         /* 00000000 000abcde fghijklm nopqrstu ->
   206            11110abc 10defghi 10jklmno 10pqrstu */
   208         outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
   209         outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
   210                              | ((inBuf[i+L_2] & 0xF0) >> 4);
   211         outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
   212                              | ((inBuf[i+L_3] & 0xC0) >> 6);
   213         outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
   215         len += 4;
   216       } else if( inBuf[i+L_2] >= 0x08 ) {
   217         /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
   218         /* 00000000 00000000 abcdefgh ijklmnop ->
   219            1110abcd 10efghij 10klmnop */
   221         outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
   222         outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
   223                              | ((inBuf[i+L_3] & 0xC0) >> 6);
   224         outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
   226         len += 3;
   227       } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
   228         /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
   229         /* 00000000 00000000 00000abc defghijk ->
   230            110abcde 10fghijk */
   232         outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
   233                              | ((inBuf[i+L_3] & 0xC0) >> 6);
   234         outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
   236         len += 2;
   237       } else {
   238         /* 0000 0000-0000 007F -> 0xxxxxx */
   239         /* 00000000 00000000 00000000 0abcdefg ->
   240            0abcdefg */
   242         outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
   244         len += 1;
   245       }
   246     }
   248     *outBufLen = len;
   249     return PR_TRUE;
   250   }
   251 }
   253 PRBool
   254 sec_port_ucs2_utf8_conversion_function
   255 (
   256   PRBool toUnicode,
   257   unsigned char *inBuf,
   258   unsigned int inBufLen,
   259   unsigned char *outBuf,
   260   unsigned int maxOutBufLen,
   261   unsigned int *outBufLen
   262 )
   263 {
   264   PORT_Assert((unsigned int *)NULL != outBufLen);
   266   if( toUnicode ) {
   267     unsigned int i, len = 0;
   269     for( i = 0; i < inBufLen; ) {
   270       if( (inBuf[i] & 0x80) == 0x00 ) {
   271         i += 1;
   272         len += 2;
   273       } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
   274         i += 2;
   275         len += 2;
   276       } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
   277         i += 3;
   278         len += 2;
   279       } else if( (inBuf[i] & 0xF8) == 0xF0 ) { 
   280         i += 4;
   281         len += 4;
   282       } else return PR_FALSE;
   283     }
   285     if( len > maxOutBufLen ) {
   286       *outBufLen = len;
   287       return PR_FALSE;
   288     }
   290     len = 0;
   292     for( i = 0; i < inBufLen; ) {
   293       PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
   295       if (ucs4 == BAD_UTF8) return PR_FALSE;
   297       if( ucs4 < 0x10000) {
   298         outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
   299         outBuf[len+H_1] = (unsigned char)ucs4;
   300         len += 2;
   301       } else {
   302 	ucs4 -= 0x10000;
   303         outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
   304         outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
   305         outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
   306         outBuf[len+2+H_1] = (unsigned char)ucs4;
   307 	len += 4;
   308       }
   309     }
   311     *outBufLen = len;
   312     return PR_TRUE;
   313   } else {
   314     unsigned int i, len = 0;
   315     PORT_Assert((inBufLen % 2) == 0);
   316     if ((inBufLen % 2) != 0) {
   317       *outBufLen = 0;
   318       return PR_FALSE;
   319     }
   321     for( i = 0; i < inBufLen; i += 2 ) {
   322       if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
   323       else if( inBuf[i+H_0] < 0x08 ) len += 2;
   324       else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
   325         if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
   326           i += 2;
   327           len += 4;
   328         } else {
   329           return PR_FALSE;
   330         }
   331       }
   332       else len += 3;
   333     }
   335     if( len > maxOutBufLen ) {
   336       *outBufLen = len;
   337       return PR_FALSE;
   338     }
   340     len = 0;
   342     for( i = 0; i < inBufLen; i += 2 ) {
   343       if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
   344         /* 0000-007F -> 0xxxxxx */
   345         /* 00000000 0abcdefg -> 0abcdefg */
   347         outBuf[len] = inBuf[i+H_1] & 0x7F;
   349         len += 1;
   350       } else if( inBuf[i+H_0] < 0x08 ) {
   351         /* 0080-07FF -> 110xxxxx 10xxxxxx */
   352         /* 00000abc defghijk -> 110abcde 10fghijk */
   354         outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) 
   355                              | ((inBuf[i+H_1] & 0xC0) >> 6);
   356         outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
   358         len += 2;
   359       } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
   360         int abcde, BCDE;
   362         PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
   364         /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
   365         /* 110110BC DEfghijk 110111lm nopqrstu ->
   366            { Let abcde = BCDE + 1 }
   367            11110abc 10defghi 10jklmno 10pqrstu */
   369         BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
   370         abcde = BCDE + 1;
   372         outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
   373         outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) 
   374                              | ((inBuf[i+0+H_1] & 0x3C) >> 2);
   375         outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
   376                              | ((inBuf[i+2+H_0] & 0x03) << 2)
   377                              | ((inBuf[i+2+H_1] & 0xC0) >> 6);
   378         outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
   380         i += 2;
   381         len += 4;
   382       } else {
   383         /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
   384         /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
   386         outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
   387         outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) 
   388                              | ((inBuf[i+H_1] & 0xC0) >> 6);
   389         outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
   391         len += 3;
   392       }
   393     }
   395     *outBufLen = len;
   396     return PR_TRUE;
   397   }
   398 }
   400 PRBool
   401 sec_port_iso88591_utf8_conversion_function
   402 (
   403   const unsigned char *inBuf,
   404   unsigned int inBufLen,
   405   unsigned char *outBuf,
   406   unsigned int maxOutBufLen,
   407   unsigned int *outBufLen
   408 )
   409 {
   410   unsigned int i, len = 0;
   412   PORT_Assert((unsigned int *)NULL != outBufLen);
   414   for( i = 0; i < inBufLen; i++) {
   415     if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
   416     else len += 2;
   417   }
   419   if( len > maxOutBufLen ) {
   420     *outBufLen = len;
   421     return PR_FALSE;
   422   }
   424   len = 0;
   426   for( i = 0; i < inBufLen; i++) {
   427     if( (inBuf[i] & 0x80) == 0x00 ) {
   428       /* 00-7F -> 0xxxxxxx */
   429       /* 0abcdefg -> 0abcdefg */
   431       outBuf[len] = inBuf[i];
   432       len += 1;
   433     } else {
   434       /* 80-FF <- 110xxxxx 10xxxxxx */
   435       /* 00000000 abcdefgh -> 110000ab 10cdefgh */
   437       outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
   438       outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
   440       len += 2;
   441     }
   442   }
   444   *outBufLen = len;
   445   return PR_TRUE;
   446 }
   448 #ifdef TEST_UTF8
   450 #include <stdio.h>
   451 #include <string.h>
   452 #include <stdlib.h>
   453 #include <netinet/in.h> /* for htonl and htons */
   455 /*
   456  * UCS-4 vectors
   457  */
   459 struct ucs4 {
   460   PRUint32 c;
   461   char *utf8;
   462 };
   464 /*
   465  * UCS-2 vectors
   466  */
   468 struct ucs2 {
   469   PRUint16 c;
   470   char *utf8;
   471 };
   473 /*
   474  * UTF-16 vectors
   475  */
   477 struct utf16 {
   478   PRUint32 c;
   479   PRUint16 w[2];
   480 };
   483 /*
   484  * UCS-4 vectors
   485  */
   487 struct ucs4 ucs4[] = {
   488   { 0x00000001, "\x01" },
   489   { 0x00000002, "\x02" },
   490   { 0x00000003, "\x03" },
   491   { 0x00000004, "\x04" },
   492   { 0x00000007, "\x07" },
   493   { 0x00000008, "\x08" },
   494   { 0x0000000F, "\x0F" },
   495   { 0x00000010, "\x10" },
   496   { 0x0000001F, "\x1F" },
   497   { 0x00000020, "\x20" },
   498   { 0x0000003F, "\x3F" },
   499   { 0x00000040, "\x40" },
   500   { 0x0000007F, "\x7F" },
   502   { 0x00000080, "\xC2\x80" },
   503   { 0x00000081, "\xC2\x81" },
   504   { 0x00000082, "\xC2\x82" },
   505   { 0x00000084, "\xC2\x84" },
   506   { 0x00000088, "\xC2\x88" },
   507   { 0x00000090, "\xC2\x90" },
   508   { 0x000000A0, "\xC2\xA0" },
   509   { 0x000000C0, "\xC3\x80" },
   510   { 0x000000FF, "\xC3\xBF" },
   511   { 0x00000100, "\xC4\x80" },
   512   { 0x00000101, "\xC4\x81" },
   513   { 0x00000102, "\xC4\x82" },
   514   { 0x00000104, "\xC4\x84" },
   515   { 0x00000108, "\xC4\x88" },
   516   { 0x00000110, "\xC4\x90" },
   517   { 0x00000120, "\xC4\xA0" },
   518   { 0x00000140, "\xC5\x80" },
   519   { 0x00000180, "\xC6\x80" },
   520   { 0x000001FF, "\xC7\xBF" },
   521   { 0x00000200, "\xC8\x80" },
   522   { 0x00000201, "\xC8\x81" },
   523   { 0x00000202, "\xC8\x82" },
   524   { 0x00000204, "\xC8\x84" },
   525   { 0x00000208, "\xC8\x88" },
   526   { 0x00000210, "\xC8\x90" },
   527   { 0x00000220, "\xC8\xA0" },
   528   { 0x00000240, "\xC9\x80" },
   529   { 0x00000280, "\xCA\x80" },
   530   { 0x00000300, "\xCC\x80" },
   531   { 0x000003FF, "\xCF\xBF" },
   532   { 0x00000400, "\xD0\x80" },
   533   { 0x00000401, "\xD0\x81" },
   534   { 0x00000402, "\xD0\x82" },
   535   { 0x00000404, "\xD0\x84" },
   536   { 0x00000408, "\xD0\x88" },
   537   { 0x00000410, "\xD0\x90" },
   538   { 0x00000420, "\xD0\xA0" },
   539   { 0x00000440, "\xD1\x80" },
   540   { 0x00000480, "\xD2\x80" },
   541   { 0x00000500, "\xD4\x80" },
   542   { 0x00000600, "\xD8\x80" },
   543   { 0x000007FF, "\xDF\xBF" },
   545   { 0x00000800, "\xE0\xA0\x80" },
   546   { 0x00000801, "\xE0\xA0\x81" },
   547   { 0x00000802, "\xE0\xA0\x82" },
   548   { 0x00000804, "\xE0\xA0\x84" },
   549   { 0x00000808, "\xE0\xA0\x88" },
   550   { 0x00000810, "\xE0\xA0\x90" },
   551   { 0x00000820, "\xE0\xA0\xA0" },
   552   { 0x00000840, "\xE0\xA1\x80" },
   553   { 0x00000880, "\xE0\xA2\x80" },
   554   { 0x00000900, "\xE0\xA4\x80" },
   555   { 0x00000A00, "\xE0\xA8\x80" },
   556   { 0x00000C00, "\xE0\xB0\x80" },
   557   { 0x00000FFF, "\xE0\xBF\xBF" },
   558   { 0x00001000, "\xE1\x80\x80" },
   559   { 0x00001001, "\xE1\x80\x81" },
   560   { 0x00001002, "\xE1\x80\x82" },
   561   { 0x00001004, "\xE1\x80\x84" },
   562   { 0x00001008, "\xE1\x80\x88" },
   563   { 0x00001010, "\xE1\x80\x90" },
   564   { 0x00001020, "\xE1\x80\xA0" },
   565   { 0x00001040, "\xE1\x81\x80" },
   566   { 0x00001080, "\xE1\x82\x80" },
   567   { 0x00001100, "\xE1\x84\x80" },
   568   { 0x00001200, "\xE1\x88\x80" },
   569   { 0x00001400, "\xE1\x90\x80" },
   570   { 0x00001800, "\xE1\xA0\x80" },
   571   { 0x00001FFF, "\xE1\xBF\xBF" },
   572   { 0x00002000, "\xE2\x80\x80" },
   573   { 0x00002001, "\xE2\x80\x81" },
   574   { 0x00002002, "\xE2\x80\x82" },
   575   { 0x00002004, "\xE2\x80\x84" },
   576   { 0x00002008, "\xE2\x80\x88" },
   577   { 0x00002010, "\xE2\x80\x90" },
   578   { 0x00002020, "\xE2\x80\xA0" },
   579   { 0x00002040, "\xE2\x81\x80" },
   580   { 0x00002080, "\xE2\x82\x80" },
   581   { 0x00002100, "\xE2\x84\x80" },
   582   { 0x00002200, "\xE2\x88\x80" },
   583   { 0x00002400, "\xE2\x90\x80" },
   584   { 0x00002800, "\xE2\xA0\x80" },
   585   { 0x00003000, "\xE3\x80\x80" },
   586   { 0x00003FFF, "\xE3\xBF\xBF" },
   587   { 0x00004000, "\xE4\x80\x80" },
   588   { 0x00004001, "\xE4\x80\x81" },
   589   { 0x00004002, "\xE4\x80\x82" },
   590   { 0x00004004, "\xE4\x80\x84" },
   591   { 0x00004008, "\xE4\x80\x88" },
   592   { 0x00004010, "\xE4\x80\x90" },
   593   { 0x00004020, "\xE4\x80\xA0" },
   594   { 0x00004040, "\xE4\x81\x80" },
   595   { 0x00004080, "\xE4\x82\x80" },
   596   { 0x00004100, "\xE4\x84\x80" },
   597   { 0x00004200, "\xE4\x88\x80" },
   598   { 0x00004400, "\xE4\x90\x80" },
   599   { 0x00004800, "\xE4\xA0\x80" },
   600   { 0x00005000, "\xE5\x80\x80" },
   601   { 0x00006000, "\xE6\x80\x80" },
   602   { 0x00007FFF, "\xE7\xBF\xBF" },
   603   { 0x00008000, "\xE8\x80\x80" },
   604   { 0x00008001, "\xE8\x80\x81" },
   605   { 0x00008002, "\xE8\x80\x82" },
   606   { 0x00008004, "\xE8\x80\x84" },
   607   { 0x00008008, "\xE8\x80\x88" },
   608   { 0x00008010, "\xE8\x80\x90" },
   609   { 0x00008020, "\xE8\x80\xA0" },
   610   { 0x00008040, "\xE8\x81\x80" },
   611   { 0x00008080, "\xE8\x82\x80" },
   612   { 0x00008100, "\xE8\x84\x80" },
   613   { 0x00008200, "\xE8\x88\x80" },
   614   { 0x00008400, "\xE8\x90\x80" },
   615   { 0x00008800, "\xE8\xA0\x80" },
   616   { 0x00009000, "\xE9\x80\x80" },
   617   { 0x0000A000, "\xEA\x80\x80" },
   618   { 0x0000C000, "\xEC\x80\x80" },
   619   { 0x0000FFFF, "\xEF\xBF\xBF" },
   621   { 0x00010000, "\xF0\x90\x80\x80" },
   622   { 0x00010001, "\xF0\x90\x80\x81" },
   623   { 0x00010002, "\xF0\x90\x80\x82" },
   624   { 0x00010004, "\xF0\x90\x80\x84" },
   625   { 0x00010008, "\xF0\x90\x80\x88" },
   626   { 0x00010010, "\xF0\x90\x80\x90" },
   627   { 0x00010020, "\xF0\x90\x80\xA0" },
   628   { 0x00010040, "\xF0\x90\x81\x80" },
   629   { 0x00010080, "\xF0\x90\x82\x80" },
   630   { 0x00010100, "\xF0\x90\x84\x80" },
   631   { 0x00010200, "\xF0\x90\x88\x80" },
   632   { 0x00010400, "\xF0\x90\x90\x80" },
   633   { 0x00010800, "\xF0\x90\xA0\x80" },
   634   { 0x00011000, "\xF0\x91\x80\x80" },
   635   { 0x00012000, "\xF0\x92\x80\x80" },
   636   { 0x00014000, "\xF0\x94\x80\x80" },
   637   { 0x00018000, "\xF0\x98\x80\x80" },
   638   { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
   639   { 0x00020000, "\xF0\xA0\x80\x80" },
   640   { 0x00020001, "\xF0\xA0\x80\x81" },
   641   { 0x00020002, "\xF0\xA0\x80\x82" },
   642   { 0x00020004, "\xF0\xA0\x80\x84" },
   643   { 0x00020008, "\xF0\xA0\x80\x88" },
   644   { 0x00020010, "\xF0\xA0\x80\x90" },
   645   { 0x00020020, "\xF0\xA0\x80\xA0" },
   646   { 0x00020040, "\xF0\xA0\x81\x80" },
   647   { 0x00020080, "\xF0\xA0\x82\x80" },
   648   { 0x00020100, "\xF0\xA0\x84\x80" },
   649   { 0x00020200, "\xF0\xA0\x88\x80" },
   650   { 0x00020400, "\xF0\xA0\x90\x80" },
   651   { 0x00020800, "\xF0\xA0\xA0\x80" },
   652   { 0x00021000, "\xF0\xA1\x80\x80" },
   653   { 0x00022000, "\xF0\xA2\x80\x80" },
   654   { 0x00024000, "\xF0\xA4\x80\x80" },
   655   { 0x00028000, "\xF0\xA8\x80\x80" },
   656   { 0x00030000, "\xF0\xB0\x80\x80" },
   657   { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
   658   { 0x00040000, "\xF1\x80\x80\x80" },
   659   { 0x00040001, "\xF1\x80\x80\x81" },
   660   { 0x00040002, "\xF1\x80\x80\x82" },
   661   { 0x00040004, "\xF1\x80\x80\x84" },
   662   { 0x00040008, "\xF1\x80\x80\x88" },
   663   { 0x00040010, "\xF1\x80\x80\x90" },
   664   { 0x00040020, "\xF1\x80\x80\xA0" },
   665   { 0x00040040, "\xF1\x80\x81\x80" },
   666   { 0x00040080, "\xF1\x80\x82\x80" },
   667   { 0x00040100, "\xF1\x80\x84\x80" },
   668   { 0x00040200, "\xF1\x80\x88\x80" },
   669   { 0x00040400, "\xF1\x80\x90\x80" },
   670   { 0x00040800, "\xF1\x80\xA0\x80" },
   671   { 0x00041000, "\xF1\x81\x80\x80" },
   672   { 0x00042000, "\xF1\x82\x80\x80" },
   673   { 0x00044000, "\xF1\x84\x80\x80" },
   674   { 0x00048000, "\xF1\x88\x80\x80" },
   675   { 0x00050000, "\xF1\x90\x80\x80" },
   676   { 0x00060000, "\xF1\xA0\x80\x80" },
   677   { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
   678   { 0x00080000, "\xF2\x80\x80\x80" },
   679   { 0x00080001, "\xF2\x80\x80\x81" },
   680   { 0x00080002, "\xF2\x80\x80\x82" },
   681   { 0x00080004, "\xF2\x80\x80\x84" },
   682   { 0x00080008, "\xF2\x80\x80\x88" },
   683   { 0x00080010, "\xF2\x80\x80\x90" },
   684   { 0x00080020, "\xF2\x80\x80\xA0" },
   685   { 0x00080040, "\xF2\x80\x81\x80" },
   686   { 0x00080080, "\xF2\x80\x82\x80" },
   687   { 0x00080100, "\xF2\x80\x84\x80" },
   688   { 0x00080200, "\xF2\x80\x88\x80" },
   689   { 0x00080400, "\xF2\x80\x90\x80" },
   690   { 0x00080800, "\xF2\x80\xA0\x80" },
   691   { 0x00081000, "\xF2\x81\x80\x80" },
   692   { 0x00082000, "\xF2\x82\x80\x80" },
   693   { 0x00084000, "\xF2\x84\x80\x80" },
   694   { 0x00088000, "\xF2\x88\x80\x80" },
   695   { 0x00090000, "\xF2\x90\x80\x80" },
   696   { 0x000A0000, "\xF2\xA0\x80\x80" },
   697   { 0x000C0000, "\xF3\x80\x80\x80" },
   698   { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
   699   { 0x00100000, "\xF4\x80\x80\x80" },
   700   { 0x00100001, "\xF4\x80\x80\x81" },
   701   { 0x00100002, "\xF4\x80\x80\x82" },
   702   { 0x00100004, "\xF4\x80\x80\x84" },
   703   { 0x00100008, "\xF4\x80\x80\x88" },
   704   { 0x00100010, "\xF4\x80\x80\x90" },
   705   { 0x00100020, "\xF4\x80\x80\xA0" },
   706   { 0x00100040, "\xF4\x80\x81\x80" },
   707   { 0x00100080, "\xF4\x80\x82\x80" },
   708   { 0x00100100, "\xF4\x80\x84\x80" },
   709   { 0x00100200, "\xF4\x80\x88\x80" },
   710   { 0x00100400, "\xF4\x80\x90\x80" },
   711   { 0x00100800, "\xF4\x80\xA0\x80" },
   712   { 0x00101000, "\xF4\x81\x80\x80" },
   713   { 0x00102000, "\xF4\x82\x80\x80" },
   714   { 0x00104000, "\xF4\x84\x80\x80" },
   715   { 0x00108000, "\xF4\x88\x80\x80" },
   716   { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
   717 };
   719 /*
   720  * UCS-2 vectors
   721  */
   723 struct ucs2 ucs2[] = {
   724   { 0x0001, "\x01" },
   725   { 0x0002, "\x02" },
   726   { 0x0003, "\x03" },
   727   { 0x0004, "\x04" },
   728   { 0x0007, "\x07" },
   729   { 0x0008, "\x08" },
   730   { 0x000F, "\x0F" },
   731   { 0x0010, "\x10" },
   732   { 0x001F, "\x1F" },
   733   { 0x0020, "\x20" },
   734   { 0x003F, "\x3F" },
   735   { 0x0040, "\x40" },
   736   { 0x007F, "\x7F" },
   738   { 0x0080, "\xC2\x80" },
   739   { 0x0081, "\xC2\x81" },
   740   { 0x0082, "\xC2\x82" },
   741   { 0x0084, "\xC2\x84" },
   742   { 0x0088, "\xC2\x88" },
   743   { 0x0090, "\xC2\x90" },
   744   { 0x00A0, "\xC2\xA0" },
   745   { 0x00C0, "\xC3\x80" },
   746   { 0x00FF, "\xC3\xBF" },
   747   { 0x0100, "\xC4\x80" },
   748   { 0x0101, "\xC4\x81" },
   749   { 0x0102, "\xC4\x82" },
   750   { 0x0104, "\xC4\x84" },
   751   { 0x0108, "\xC4\x88" },
   752   { 0x0110, "\xC4\x90" },
   753   { 0x0120, "\xC4\xA0" },
   754   { 0x0140, "\xC5\x80" },
   755   { 0x0180, "\xC6\x80" },
   756   { 0x01FF, "\xC7\xBF" },
   757   { 0x0200, "\xC8\x80" },
   758   { 0x0201, "\xC8\x81" },
   759   { 0x0202, "\xC8\x82" },
   760   { 0x0204, "\xC8\x84" },
   761   { 0x0208, "\xC8\x88" },
   762   { 0x0210, "\xC8\x90" },
   763   { 0x0220, "\xC8\xA0" },
   764   { 0x0240, "\xC9\x80" },
   765   { 0x0280, "\xCA\x80" },
   766   { 0x0300, "\xCC\x80" },
   767   { 0x03FF, "\xCF\xBF" },
   768   { 0x0400, "\xD0\x80" },
   769   { 0x0401, "\xD0\x81" },
   770   { 0x0402, "\xD0\x82" },
   771   { 0x0404, "\xD0\x84" },
   772   { 0x0408, "\xD0\x88" },
   773   { 0x0410, "\xD0\x90" },
   774   { 0x0420, "\xD0\xA0" },
   775   { 0x0440, "\xD1\x80" },
   776   { 0x0480, "\xD2\x80" },
   777   { 0x0500, "\xD4\x80" },
   778   { 0x0600, "\xD8\x80" },
   779   { 0x07FF, "\xDF\xBF" },
   781   { 0x0800, "\xE0\xA0\x80" },
   782   { 0x0801, "\xE0\xA0\x81" },
   783   { 0x0802, "\xE0\xA0\x82" },
   784   { 0x0804, "\xE0\xA0\x84" },
   785   { 0x0808, "\xE0\xA0\x88" },
   786   { 0x0810, "\xE0\xA0\x90" },
   787   { 0x0820, "\xE0\xA0\xA0" },
   788   { 0x0840, "\xE0\xA1\x80" },
   789   { 0x0880, "\xE0\xA2\x80" },
   790   { 0x0900, "\xE0\xA4\x80" },
   791   { 0x0A00, "\xE0\xA8\x80" },
   792   { 0x0C00, "\xE0\xB0\x80" },
   793   { 0x0FFF, "\xE0\xBF\xBF" },
   794   { 0x1000, "\xE1\x80\x80" },
   795   { 0x1001, "\xE1\x80\x81" },
   796   { 0x1002, "\xE1\x80\x82" },
   797   { 0x1004, "\xE1\x80\x84" },
   798   { 0x1008, "\xE1\x80\x88" },
   799   { 0x1010, "\xE1\x80\x90" },
   800   { 0x1020, "\xE1\x80\xA0" },
   801   { 0x1040, "\xE1\x81\x80" },
   802   { 0x1080, "\xE1\x82\x80" },
   803   { 0x1100, "\xE1\x84\x80" },
   804   { 0x1200, "\xE1\x88\x80" },
   805   { 0x1400, "\xE1\x90\x80" },
   806   { 0x1800, "\xE1\xA0\x80" },
   807   { 0x1FFF, "\xE1\xBF\xBF" },
   808   { 0x2000, "\xE2\x80\x80" },
   809   { 0x2001, "\xE2\x80\x81" },
   810   { 0x2002, "\xE2\x80\x82" },
   811   { 0x2004, "\xE2\x80\x84" },
   812   { 0x2008, "\xE2\x80\x88" },
   813   { 0x2010, "\xE2\x80\x90" },
   814   { 0x2020, "\xE2\x80\xA0" },
   815   { 0x2040, "\xE2\x81\x80" },
   816   { 0x2080, "\xE2\x82\x80" },
   817   { 0x2100, "\xE2\x84\x80" },
   818   { 0x2200, "\xE2\x88\x80" },
   819   { 0x2400, "\xE2\x90\x80" },
   820   { 0x2800, "\xE2\xA0\x80" },
   821   { 0x3000, "\xE3\x80\x80" },
   822   { 0x3FFF, "\xE3\xBF\xBF" },
   823   { 0x4000, "\xE4\x80\x80" },
   824   { 0x4001, "\xE4\x80\x81" },
   825   { 0x4002, "\xE4\x80\x82" },
   826   { 0x4004, "\xE4\x80\x84" },
   827   { 0x4008, "\xE4\x80\x88" },
   828   { 0x4010, "\xE4\x80\x90" },
   829   { 0x4020, "\xE4\x80\xA0" },
   830   { 0x4040, "\xE4\x81\x80" },
   831   { 0x4080, "\xE4\x82\x80" },
   832   { 0x4100, "\xE4\x84\x80" },
   833   { 0x4200, "\xE4\x88\x80" },
   834   { 0x4400, "\xE4\x90\x80" },
   835   { 0x4800, "\xE4\xA0\x80" },
   836   { 0x5000, "\xE5\x80\x80" },
   837   { 0x6000, "\xE6\x80\x80" },
   838   { 0x7FFF, "\xE7\xBF\xBF" },
   839   { 0x8000, "\xE8\x80\x80" },
   840   { 0x8001, "\xE8\x80\x81" },
   841   { 0x8002, "\xE8\x80\x82" },
   842   { 0x8004, "\xE8\x80\x84" },
   843   { 0x8008, "\xE8\x80\x88" },
   844   { 0x8010, "\xE8\x80\x90" },
   845   { 0x8020, "\xE8\x80\xA0" },
   846   { 0x8040, "\xE8\x81\x80" },
   847   { 0x8080, "\xE8\x82\x80" },
   848   { 0x8100, "\xE8\x84\x80" },
   849   { 0x8200, "\xE8\x88\x80" },
   850   { 0x8400, "\xE8\x90\x80" },
   851   { 0x8800, "\xE8\xA0\x80" },
   852   { 0x9000, "\xE9\x80\x80" },
   853   { 0xA000, "\xEA\x80\x80" },
   854   { 0xC000, "\xEC\x80\x80" },
   855   { 0xFFFF, "\xEF\xBF\xBF" }
   857 };
   859 /*
   860  * UTF-16 vectors
   861  */
   863 struct utf16 utf16[] = {
   864   { 0x00010000, { 0xD800, 0xDC00 } },
   865   { 0x00010001, { 0xD800, 0xDC01 } },
   866   { 0x00010002, { 0xD800, 0xDC02 } },
   867   { 0x00010003, { 0xD800, 0xDC03 } },
   868   { 0x00010004, { 0xD800, 0xDC04 } },
   869   { 0x00010007, { 0xD800, 0xDC07 } },
   870   { 0x00010008, { 0xD800, 0xDC08 } },
   871   { 0x0001000F, { 0xD800, 0xDC0F } },
   872   { 0x00010010, { 0xD800, 0xDC10 } },
   873   { 0x0001001F, { 0xD800, 0xDC1F } },
   874   { 0x00010020, { 0xD800, 0xDC20 } },
   875   { 0x0001003F, { 0xD800, 0xDC3F } },
   876   { 0x00010040, { 0xD800, 0xDC40 } },
   877   { 0x0001007F, { 0xD800, 0xDC7F } },
   878   { 0x00010080, { 0xD800, 0xDC80 } },
   879   { 0x00010081, { 0xD800, 0xDC81 } },
   880   { 0x00010082, { 0xD800, 0xDC82 } },
   881   { 0x00010084, { 0xD800, 0xDC84 } },
   882   { 0x00010088, { 0xD800, 0xDC88 } },
   883   { 0x00010090, { 0xD800, 0xDC90 } },
   884   { 0x000100A0, { 0xD800, 0xDCA0 } },
   885   { 0x000100C0, { 0xD800, 0xDCC0 } },
   886   { 0x000100FF, { 0xD800, 0xDCFF } },
   887   { 0x00010100, { 0xD800, 0xDD00 } },
   888   { 0x00010101, { 0xD800, 0xDD01 } },
   889   { 0x00010102, { 0xD800, 0xDD02 } },
   890   { 0x00010104, { 0xD800, 0xDD04 } },
   891   { 0x00010108, { 0xD800, 0xDD08 } },
   892   { 0x00010110, { 0xD800, 0xDD10 } },
   893   { 0x00010120, { 0xD800, 0xDD20 } },
   894   { 0x00010140, { 0xD800, 0xDD40 } },
   895   { 0x00010180, { 0xD800, 0xDD80 } },
   896   { 0x000101FF, { 0xD800, 0xDDFF } },
   897   { 0x00010200, { 0xD800, 0xDE00 } },
   898   { 0x00010201, { 0xD800, 0xDE01 } },
   899   { 0x00010202, { 0xD800, 0xDE02 } },
   900   { 0x00010204, { 0xD800, 0xDE04 } },
   901   { 0x00010208, { 0xD800, 0xDE08 } },
   902   { 0x00010210, { 0xD800, 0xDE10 } },
   903   { 0x00010220, { 0xD800, 0xDE20 } },
   904   { 0x00010240, { 0xD800, 0xDE40 } },
   905   { 0x00010280, { 0xD800, 0xDE80 } },
   906   { 0x00010300, { 0xD800, 0xDF00 } },
   907   { 0x000103FF, { 0xD800, 0xDFFF } },
   908   { 0x00010400, { 0xD801, 0xDC00 } },
   909   { 0x00010401, { 0xD801, 0xDC01 } },
   910   { 0x00010402, { 0xD801, 0xDC02 } },
   911   { 0x00010404, { 0xD801, 0xDC04 } },
   912   { 0x00010408, { 0xD801, 0xDC08 } },
   913   { 0x00010410, { 0xD801, 0xDC10 } },
   914   { 0x00010420, { 0xD801, 0xDC20 } },
   915   { 0x00010440, { 0xD801, 0xDC40 } },
   916   { 0x00010480, { 0xD801, 0xDC80 } },
   917   { 0x00010500, { 0xD801, 0xDD00 } },
   918   { 0x00010600, { 0xD801, 0xDE00 } },
   919   { 0x000107FF, { 0xD801, 0xDFFF } },
   920   { 0x00010800, { 0xD802, 0xDC00 } },
   921   { 0x00010801, { 0xD802, 0xDC01 } },
   922   { 0x00010802, { 0xD802, 0xDC02 } },
   923   { 0x00010804, { 0xD802, 0xDC04 } },
   924   { 0x00010808, { 0xD802, 0xDC08 } },
   925   { 0x00010810, { 0xD802, 0xDC10 } },
   926   { 0x00010820, { 0xD802, 0xDC20 } },
   927   { 0x00010840, { 0xD802, 0xDC40 } },
   928   { 0x00010880, { 0xD802, 0xDC80 } },
   929   { 0x00010900, { 0xD802, 0xDD00 } },
   930   { 0x00010A00, { 0xD802, 0xDE00 } },
   931   { 0x00010C00, { 0xD803, 0xDC00 } },
   932   { 0x00010FFF, { 0xD803, 0xDFFF } },
   933   { 0x00011000, { 0xD804, 0xDC00 } },
   934   { 0x00011001, { 0xD804, 0xDC01 } },
   935   { 0x00011002, { 0xD804, 0xDC02 } },
   936   { 0x00011004, { 0xD804, 0xDC04 } },
   937   { 0x00011008, { 0xD804, 0xDC08 } },
   938   { 0x00011010, { 0xD804, 0xDC10 } },
   939   { 0x00011020, { 0xD804, 0xDC20 } },
   940   { 0x00011040, { 0xD804, 0xDC40 } },
   941   { 0x00011080, { 0xD804, 0xDC80 } },
   942   { 0x00011100, { 0xD804, 0xDD00 } },
   943   { 0x00011200, { 0xD804, 0xDE00 } },
   944   { 0x00011400, { 0xD805, 0xDC00 } },
   945   { 0x00011800, { 0xD806, 0xDC00 } },
   946   { 0x00011FFF, { 0xD807, 0xDFFF } },
   947   { 0x00012000, { 0xD808, 0xDC00 } },
   948   { 0x00012001, { 0xD808, 0xDC01 } },
   949   { 0x00012002, { 0xD808, 0xDC02 } },
   950   { 0x00012004, { 0xD808, 0xDC04 } },
   951   { 0x00012008, { 0xD808, 0xDC08 } },
   952   { 0x00012010, { 0xD808, 0xDC10 } },
   953   { 0x00012020, { 0xD808, 0xDC20 } },
   954   { 0x00012040, { 0xD808, 0xDC40 } },
   955   { 0x00012080, { 0xD808, 0xDC80 } },
   956   { 0x00012100, { 0xD808, 0xDD00 } },
   957   { 0x00012200, { 0xD808, 0xDE00 } },
   958   { 0x00012400, { 0xD809, 0xDC00 } },
   959   { 0x00012800, { 0xD80A, 0xDC00 } },
   960   { 0x00013000, { 0xD80C, 0xDC00 } },
   961   { 0x00013FFF, { 0xD80F, 0xDFFF } },
   962   { 0x00014000, { 0xD810, 0xDC00 } },
   963   { 0x00014001, { 0xD810, 0xDC01 } },
   964   { 0x00014002, { 0xD810, 0xDC02 } },
   965   { 0x00014004, { 0xD810, 0xDC04 } },
   966   { 0x00014008, { 0xD810, 0xDC08 } },
   967   { 0x00014010, { 0xD810, 0xDC10 } },
   968   { 0x00014020, { 0xD810, 0xDC20 } },
   969   { 0x00014040, { 0xD810, 0xDC40 } },
   970   { 0x00014080, { 0xD810, 0xDC80 } },
   971   { 0x00014100, { 0xD810, 0xDD00 } },
   972   { 0x00014200, { 0xD810, 0xDE00 } },
   973   { 0x00014400, { 0xD811, 0xDC00 } },
   974   { 0x00014800, { 0xD812, 0xDC00 } },
   975   { 0x00015000, { 0xD814, 0xDC00 } },
   976   { 0x00016000, { 0xD818, 0xDC00 } },
   977   { 0x00017FFF, { 0xD81F, 0xDFFF } },
   978   { 0x00018000, { 0xD820, 0xDC00 } },
   979   { 0x00018001, { 0xD820, 0xDC01 } },
   980   { 0x00018002, { 0xD820, 0xDC02 } },
   981   { 0x00018004, { 0xD820, 0xDC04 } },
   982   { 0x00018008, { 0xD820, 0xDC08 } },
   983   { 0x00018010, { 0xD820, 0xDC10 } },
   984   { 0x00018020, { 0xD820, 0xDC20 } },
   985   { 0x00018040, { 0xD820, 0xDC40 } },
   986   { 0x00018080, { 0xD820, 0xDC80 } },
   987   { 0x00018100, { 0xD820, 0xDD00 } },
   988   { 0x00018200, { 0xD820, 0xDE00 } },
   989   { 0x00018400, { 0xD821, 0xDC00 } },
   990   { 0x00018800, { 0xD822, 0xDC00 } },
   991   { 0x00019000, { 0xD824, 0xDC00 } },
   992   { 0x0001A000, { 0xD828, 0xDC00 } },
   993   { 0x0001C000, { 0xD830, 0xDC00 } },
   994   { 0x0001FFFF, { 0xD83F, 0xDFFF } },
   995   { 0x00020000, { 0xD840, 0xDC00 } },
   996   { 0x00020001, { 0xD840, 0xDC01 } },
   997   { 0x00020002, { 0xD840, 0xDC02 } },
   998   { 0x00020004, { 0xD840, 0xDC04 } },
   999   { 0x00020008, { 0xD840, 0xDC08 } },
  1000   { 0x00020010, { 0xD840, 0xDC10 } },
  1001   { 0x00020020, { 0xD840, 0xDC20 } },
  1002   { 0x00020040, { 0xD840, 0xDC40 } },
  1003   { 0x00020080, { 0xD840, 0xDC80 } },
  1004   { 0x00020100, { 0xD840, 0xDD00 } },
  1005   { 0x00020200, { 0xD840, 0xDE00 } },
  1006   { 0x00020400, { 0xD841, 0xDC00 } },
  1007   { 0x00020800, { 0xD842, 0xDC00 } },
  1008   { 0x00021000, { 0xD844, 0xDC00 } },
  1009   { 0x00022000, { 0xD848, 0xDC00 } },
  1010   { 0x00024000, { 0xD850, 0xDC00 } },
  1011   { 0x00028000, { 0xD860, 0xDC00 } },
  1012   { 0x0002FFFF, { 0xD87F, 0xDFFF } },
  1013   { 0x00030000, { 0xD880, 0xDC00 } },
  1014   { 0x00030001, { 0xD880, 0xDC01 } },
  1015   { 0x00030002, { 0xD880, 0xDC02 } },
  1016   { 0x00030004, { 0xD880, 0xDC04 } },
  1017   { 0x00030008, { 0xD880, 0xDC08 } },
  1018   { 0x00030010, { 0xD880, 0xDC10 } },
  1019   { 0x00030020, { 0xD880, 0xDC20 } },
  1020   { 0x00030040, { 0xD880, 0xDC40 } },
  1021   { 0x00030080, { 0xD880, 0xDC80 } },
  1022   { 0x00030100, { 0xD880, 0xDD00 } },
  1023   { 0x00030200, { 0xD880, 0xDE00 } },
  1024   { 0x00030400, { 0xD881, 0xDC00 } },
  1025   { 0x00030800, { 0xD882, 0xDC00 } },
  1026   { 0x00031000, { 0xD884, 0xDC00 } },
  1027   { 0x00032000, { 0xD888, 0xDC00 } },
  1028   { 0x00034000, { 0xD890, 0xDC00 } },
  1029   { 0x00038000, { 0xD8A0, 0xDC00 } },
  1030   { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
  1031   { 0x00040000, { 0xD8C0, 0xDC00 } },
  1032   { 0x00040001, { 0xD8C0, 0xDC01 } },
  1033   { 0x00040002, { 0xD8C0, 0xDC02 } },
  1034   { 0x00040004, { 0xD8C0, 0xDC04 } },
  1035   { 0x00040008, { 0xD8C0, 0xDC08 } },
  1036   { 0x00040010, { 0xD8C0, 0xDC10 } },
  1037   { 0x00040020, { 0xD8C0, 0xDC20 } },
  1038   { 0x00040040, { 0xD8C0, 0xDC40 } },
  1039   { 0x00040080, { 0xD8C0, 0xDC80 } },
  1040   { 0x00040100, { 0xD8C0, 0xDD00 } },
  1041   { 0x00040200, { 0xD8C0, 0xDE00 } },
  1042   { 0x00040400, { 0xD8C1, 0xDC00 } },
  1043   { 0x00040800, { 0xD8C2, 0xDC00 } },
  1044   { 0x00041000, { 0xD8C4, 0xDC00 } },
  1045   { 0x00042000, { 0xD8C8, 0xDC00 } },
  1046   { 0x00044000, { 0xD8D0, 0xDC00 } },
  1047   { 0x00048000, { 0xD8E0, 0xDC00 } },
  1048   { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
  1049   { 0x00050000, { 0xD900, 0xDC00 } },
  1050   { 0x00050001, { 0xD900, 0xDC01 } },
  1051   { 0x00050002, { 0xD900, 0xDC02 } },
  1052   { 0x00050004, { 0xD900, 0xDC04 } },
  1053   { 0x00050008, { 0xD900, 0xDC08 } },
  1054   { 0x00050010, { 0xD900, 0xDC10 } },
  1055   { 0x00050020, { 0xD900, 0xDC20 } },
  1056   { 0x00050040, { 0xD900, 0xDC40 } },
  1057   { 0x00050080, { 0xD900, 0xDC80 } },
  1058   { 0x00050100, { 0xD900, 0xDD00 } },
  1059   { 0x00050200, { 0xD900, 0xDE00 } },
  1060   { 0x00050400, { 0xD901, 0xDC00 } },
  1061   { 0x00050800, { 0xD902, 0xDC00 } },
  1062   { 0x00051000, { 0xD904, 0xDC00 } },
  1063   { 0x00052000, { 0xD908, 0xDC00 } },
  1064   { 0x00054000, { 0xD910, 0xDC00 } },
  1065   { 0x00058000, { 0xD920, 0xDC00 } },
  1066   { 0x00060000, { 0xD940, 0xDC00 } },
  1067   { 0x00070000, { 0xD980, 0xDC00 } },
  1068   { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
  1069   { 0x00080000, { 0xD9C0, 0xDC00 } },
  1070   { 0x00080001, { 0xD9C0, 0xDC01 } },
  1071   { 0x00080002, { 0xD9C0, 0xDC02 } },
  1072   { 0x00080004, { 0xD9C0, 0xDC04 } },
  1073   { 0x00080008, { 0xD9C0, 0xDC08 } },
  1074   { 0x00080010, { 0xD9C0, 0xDC10 } },
  1075   { 0x00080020, { 0xD9C0, 0xDC20 } },
  1076   { 0x00080040, { 0xD9C0, 0xDC40 } },
  1077   { 0x00080080, { 0xD9C0, 0xDC80 } },
  1078   { 0x00080100, { 0xD9C0, 0xDD00 } },
  1079   { 0x00080200, { 0xD9C0, 0xDE00 } },
  1080   { 0x00080400, { 0xD9C1, 0xDC00 } },
  1081   { 0x00080800, { 0xD9C2, 0xDC00 } },
  1082   { 0x00081000, { 0xD9C4, 0xDC00 } },
  1083   { 0x00082000, { 0xD9C8, 0xDC00 } },
  1084   { 0x00084000, { 0xD9D0, 0xDC00 } },
  1085   { 0x00088000, { 0xD9E0, 0xDC00 } },
  1086   { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
  1087   { 0x00090000, { 0xDA00, 0xDC00 } },
  1088   { 0x00090001, { 0xDA00, 0xDC01 } },
  1089   { 0x00090002, { 0xDA00, 0xDC02 } },
  1090   { 0x00090004, { 0xDA00, 0xDC04 } },
  1091   { 0x00090008, { 0xDA00, 0xDC08 } },
  1092   { 0x00090010, { 0xDA00, 0xDC10 } },
  1093   { 0x00090020, { 0xDA00, 0xDC20 } },
  1094   { 0x00090040, { 0xDA00, 0xDC40 } },
  1095   { 0x00090080, { 0xDA00, 0xDC80 } },
  1096   { 0x00090100, { 0xDA00, 0xDD00 } },
  1097   { 0x00090200, { 0xDA00, 0xDE00 } },
  1098   { 0x00090400, { 0xDA01, 0xDC00 } },
  1099   { 0x00090800, { 0xDA02, 0xDC00 } },
  1100   { 0x00091000, { 0xDA04, 0xDC00 } },
  1101   { 0x00092000, { 0xDA08, 0xDC00 } },
  1102   { 0x00094000, { 0xDA10, 0xDC00 } },
  1103   { 0x00098000, { 0xDA20, 0xDC00 } },
  1104   { 0x000A0000, { 0xDA40, 0xDC00 } },
  1105   { 0x000B0000, { 0xDA80, 0xDC00 } },
  1106   { 0x000C0000, { 0xDAC0, 0xDC00 } },
  1107   { 0x000D0000, { 0xDB00, 0xDC00 } },
  1108   { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
  1109   { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
  1111 };
  1113 /* illegal utf8 sequences */
  1114 char *utf8_bad[] = {
  1115   "\xC0\x80",
  1116   "\xC1\xBF",
  1117   "\xE0\x80\x80",
  1118   "\xE0\x9F\xBF",
  1119   "\xF0\x80\x80\x80",
  1120   "\xF0\x8F\xBF\xBF",
  1121   "\xF4\x90\x80\x80",
  1122   "\xF7\xBF\xBF\xBF",
  1123   "\xF8\x80\x80\x80\x80",
  1124   "\xF8\x88\x80\x80\x80",
  1125   "\xF8\x92\x80\x80\x80",
  1126   "\xF8\x9F\xBF\xBF\xBF",
  1127   "\xF8\xA0\x80\x80\x80",
  1128   "\xF8\xA8\x80\x80\x80",
  1129   "\xF8\xB0\x80\x80\x80",
  1130   "\xF8\xBF\xBF\xBF\xBF",
  1131   "\xF9\x80\x80\x80\x88",
  1132   "\xF9\x84\x80\x80\x80",
  1133   "\xF9\xBF\xBF\xBF\xBF",
  1134   "\xFA\x80\x80\x80\x80",
  1135   "\xFA\x90\x80\x80\x80",
  1136   "\xFB\xBF\xBF\xBF\xBF",
  1137   "\xFC\x84\x80\x80\x80\x81",
  1138   "\xFC\x85\x80\x80\x80\x80",
  1139   "\xFC\x86\x80\x80\x80\x80",
  1140   "\xFC\x87\xBF\xBF\xBF\xBF",
  1141   "\xFC\x88\xA0\x80\x80\x80",
  1142   "\xFC\x89\x80\x80\x80\x80",
  1143   "\xFC\x8A\x80\x80\x80\x80",
  1144   "\xFC\x90\x80\x80\x80\x82",
  1145   "\xFD\x80\x80\x80\x80\x80",
  1146   "\xFD\xBF\xBF\xBF\xBF\xBF",
  1147   "\x80",
  1148   "\xC3",
  1149   "\xC3\xC3\x80",
  1150   "\xED\xA0\x80",
  1151   "\xED\xBF\x80",
  1152   "\xED\xBF\xBF",
  1153   "\xED\xA0\x80\xE0\xBF\xBF",
  1154 };
  1156 static void
  1157 dump_utf8
  1159   char *word,
  1160   unsigned char *utf8,
  1161   char *end
  1164   fprintf(stdout, "%s ", word);
  1165   for( ; *utf8; utf8++ ) {
  1166     fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
  1168   fprintf(stdout, "%s", end);
  1171 static PRBool
  1172 test_ucs4_chars
  1174   void
  1177   PRBool rv = PR_TRUE;
  1178   int i;
  1180   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1181     struct ucs4 *e = &ucs4[i];
  1182     PRBool result;
  1183     unsigned char utf8[8];
  1184     unsigned int len = 0;
  1185     PRUint32 back = 0;
  1187     (void)memset(utf8, 0, sizeof(utf8));
  1189     result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 
  1190       (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
  1192     if( !result ) {
  1193       fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
  1194       rv = PR_FALSE;
  1195       continue;
  1198     if( (len >= sizeof(utf8)) ||
  1199         (strlen(e->utf8) != len) ||
  1200         (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
  1201       fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
  1202       dump_utf8("expected", e->utf8, ", ");
  1203       dump_utf8("received", utf8, "\n");
  1204       rv = PR_FALSE;
  1205       continue;
  1208     result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1209       utf8, len, (unsigned char *)&back, sizeof(back), &len);
  1211     if( !result ) {
  1212       dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
  1213       rv = PR_FALSE;
  1214       continue;
  1217     if( (sizeof(back) != len) || (e->c != back) ) {
  1218       dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
  1219       fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
  1220       rv = PR_FALSE;
  1221       continue;
  1225   return rv;
  1228 static PRBool
  1229 test_ucs2_chars
  1231   void
  1234   PRBool rv = PR_TRUE;
  1235   int i;
  1237   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1238     struct ucs2 *e = &ucs2[i];
  1239     PRBool result;
  1240     unsigned char utf8[8];
  1241     unsigned int len = 0;
  1242     PRUint16 back = 0;
  1244     (void)memset(utf8, 0, sizeof(utf8));
  1246     result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
  1247       (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
  1249     if( !result ) {
  1250       fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
  1251       rv = PR_FALSE;
  1252       continue;
  1255     if( (len >= sizeof(utf8)) ||
  1256         (strlen(e->utf8) != len) ||
  1257         (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
  1258       fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
  1259       dump_utf8("expected", e->utf8, ", ");
  1260       dump_utf8("received", utf8, "\n");
  1261       rv = PR_FALSE;
  1262       continue;
  1265     result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1266       utf8, len, (unsigned char *)&back, sizeof(back), &len);
  1268     if( !result ) {
  1269       dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
  1270       rv = PR_FALSE;
  1271       continue;
  1274     if( (sizeof(back) != len) || (e->c != back) ) {
  1275       dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
  1276       fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
  1277       rv = PR_FALSE;
  1278       continue;
  1282   return rv;
  1285 static PRBool
  1286 test_utf16_chars
  1288   void
  1291   PRBool rv = PR_TRUE;
  1292   int i;
  1294   for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
  1295     struct utf16 *e = &utf16[i];
  1296     PRBool result;
  1297     unsigned char utf8[8];
  1298     unsigned int len = 0;
  1299     PRUint32 back32 = 0;
  1300     PRUint16 back[2];
  1302     (void)memset(utf8, 0, sizeof(utf8));
  1304     result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 
  1305       (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
  1307     if( !result ) {
  1308       fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", 
  1309               e->w[0], e->w[1]);
  1310       rv = PR_FALSE;
  1311       continue;
  1314     result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1315       utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
  1317     if( 4 != len ) {
  1318       fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
  1319               "unexpected len %d\n", e->w[0], e->w[1], len);
  1320       rv = PR_FALSE;
  1321       continue;
  1324     utf8[len] = '\0'; /* null-terminate for printing */
  1326     if( !result ) {
  1327       dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
  1328       rv = PR_FALSE;
  1329       continue;
  1332     if( (sizeof(back32) != len) || (e->c != back32) ) {
  1333       fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", 
  1334               e->w[0], e->w[1]);
  1335       dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
  1336       if( sizeof(back32) != len ) {
  1337         fprintf(stdout, "len is %d\n", len);
  1338       } else {
  1339         fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
  1341       rv = PR_FALSE;
  1342       continue;
  1345     (void)memset(utf8, 0, sizeof(utf8));
  1346     back[0] = back[1] = 0;
  1348     result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
  1349       (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
  1351     if( !result ) {
  1352       fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n",
  1353               e->c);
  1354       rv = PR_FALSE;
  1355       continue;
  1358     result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1359       utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
  1361     if( 4 != len ) {
  1362       fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
  1363               "unexpected len %d\n", e->c, len);
  1364       rv = PR_FALSE;
  1365       continue;
  1368     utf8[len] = '\0'; /* null-terminate for printing */
  1370     if( !result ) {
  1371       dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
  1372       rv = PR_FALSE;
  1373       continue;
  1376     if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
  1377       fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
  1378       dump_utf8("", utf8, "and then to UTF-16:");
  1379       if( sizeof(back) != len ) {
  1380         fprintf(stdout, "len is %d\n", len);
  1381       } else {
  1382         fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n",
  1383                 e->w[0], e->w[1], back[0], back[1]);
  1385       rv = PR_FALSE;
  1386       continue;
  1390   return rv;
  1393 static PRBool
  1394 test_utf8_bad_chars
  1396   void
  1399   PRBool rv = PR_TRUE;
  1400   int i;
  1402   for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
  1403     PRBool result;
  1404     unsigned char destbuf[30];
  1405     unsigned int len = 0;
  1407     result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1408       (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
  1410     if( result ) {
  1411       dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n");
  1412       rv = PR_FALSE;
  1413       continue;
  1415     result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1416       (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
  1418     if( result ) {
  1419       dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n");
  1420       rv = PR_FALSE;
  1421       continue;
  1426   return rv;
  1429 static PRBool
  1430 test_iso88591_chars
  1432   void
  1435   PRBool rv = PR_TRUE;
  1436   int i;
  1438   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1439     struct ucs2 *e = &ucs2[i];
  1440     PRBool result;
  1441     unsigned char iso88591;
  1442     unsigned char utf8[3];
  1443     unsigned int len = 0;
  1445     if (ntohs(e->c) > 0xFF) continue;
  1447     (void)memset(utf8, 0, sizeof(utf8));
  1448     iso88591 = ntohs(e->c);
  1450     result = sec_port_iso88591_utf8_conversion_function(&iso88591,
  1451       1, utf8, sizeof(utf8), &len);
  1453     if( !result ) {
  1454       fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591);
  1455       rv = PR_FALSE;
  1456       continue;
  1459     if( (len >= sizeof(utf8)) ||
  1460         (strlen(e->utf8) != len) ||
  1461         (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
  1462       fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591);
  1463       dump_utf8("expected", e->utf8, ", ");
  1464       dump_utf8("received", utf8, "\n");
  1465       rv = PR_FALSE;
  1466       continue;
  1471   return rv;
  1474 static PRBool
  1475 test_zeroes
  1477   void
  1480   PRBool rv = PR_TRUE;
  1481   PRBool result;
  1482   PRUint32 lzero = 0;
  1483   PRUint16 szero = 0;
  1484   unsigned char utf8[8];
  1485   unsigned int len = 0;
  1486   PRUint32 lback = 1;
  1487   PRUint16 sback = 1;
  1489   (void)memset(utf8, 1, sizeof(utf8));
  1491   result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 
  1492     (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
  1494   if( !result ) {
  1495     fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
  1496     rv = PR_FALSE;
  1497   } else if( 1 != len ) {
  1498     fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
  1499     rv = PR_FALSE;
  1500   } else if( '\0' != *utf8 ) {
  1501     fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
  1502             "received %02.2x\n", (unsigned int)*utf8);
  1503     rv = PR_FALSE;
  1506   result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1507     "", 1, (unsigned char *)&lback, sizeof(lback), &len);
  1509   if( !result ) {
  1510     fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
  1511     rv = PR_FALSE;
  1512   } else if( 4 != len ) {
  1513     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
  1514     rv = PR_FALSE;
  1515   } else if( 0 != lback ) {
  1516     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
  1517             "expected 0x00000000, received 0x%08.8x\n", lback);
  1518     rv = PR_FALSE;
  1521   (void)memset(utf8, 1, sizeof(utf8));
  1523   result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 
  1524     (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
  1526   if( !result ) {
  1527     fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
  1528     rv = PR_FALSE;
  1529   } else if( 1 != len ) {
  1530     fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
  1531     rv = PR_FALSE;
  1532   } else if( '\0' != *utf8 ) {
  1533     fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
  1534             "received %02.2x\n", (unsigned int)*utf8);
  1535     rv = PR_FALSE;
  1538   result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1539     "", 1, (unsigned char *)&sback, sizeof(sback), &len);
  1541   if( !result ) {
  1542     fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
  1543     rv = PR_FALSE;
  1544   } else if( 2 != len ) {
  1545     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
  1546     rv = PR_FALSE;
  1547   } else if( 0 != sback ) {
  1548     fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
  1549             "expected 0x0000, received 0x%04.4x\n", sback);
  1550     rv = PR_FALSE;
  1553   return rv;
  1556 static PRBool
  1557 test_multichars
  1559   void
  1562   int i;
  1563   unsigned int len, lenout;
  1564   PRUint32 *ucs4s;
  1565   char *ucs4_utf8;
  1566   PRUint16 *ucs2s;
  1567   char *ucs2_utf8;
  1568   void *tmp;
  1569   PRBool result;
  1571   ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
  1572   ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
  1574   if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
  1575     fprintf(stderr, "out of memory\n");
  1576     exit(1);
  1579   len = 0;
  1580   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1581     ucs4s[i] = ucs4[i].c;
  1582     len += strlen(ucs4[i].utf8);
  1585   ucs4_utf8 = (char *)malloc(len);
  1587   len = 0;
  1588   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1589     ucs2s[i] = ucs2[i].c;
  1590     len += strlen(ucs2[i].utf8);
  1593   ucs2_utf8 = (char *)malloc(len);
  1595   if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
  1596     fprintf(stderr, "out of memory\n");
  1597     exit(1);
  1600   *ucs4_utf8 = '\0';
  1601   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1602     strcat(ucs4_utf8, ucs4[i].utf8);
  1605   *ucs2_utf8 = '\0';
  1606   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1607     strcat(ucs2_utf8, ucs2[i].utf8);
  1610   /* UTF-8 -> UCS-4 */
  1611   len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
  1612   tmp = calloc(len, 1);
  1613   if( (void *)NULL == tmp ) {
  1614     fprintf(stderr, "out of memory\n");
  1615     exit(1);
  1618   result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1619     ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
  1620   if( !result ) {
  1621     fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
  1622     goto done;
  1625   if( lenout != len ) {
  1626     fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
  1627     goto loser;
  1630   if( 0 != memcmp(ucs4s, tmp, len) ) {
  1631     fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
  1632     goto loser;
  1635   free(tmp); tmp = (void *)NULL;
  1637   /* UCS-4 -> UTF-8 */
  1638   len = strlen(ucs4_utf8);
  1639   tmp = calloc(len, 1);
  1640   if( (void *)NULL == tmp ) {
  1641     fprintf(stderr, "out of memory\n");
  1642     exit(1);
  1645   result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
  1646     (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), 
  1647     tmp, len, &lenout);
  1648   if( !result ) {
  1649     fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
  1650     goto done;
  1653   if( lenout != len ) {
  1654     fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
  1655     goto loser;
  1658   if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
  1659     fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
  1660     goto loser;
  1663   free(tmp); tmp = (void *)NULL;
  1665   /* UTF-8 -> UCS-2 */
  1666   len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
  1667   tmp = calloc(len, 1);
  1668   if( (void *)NULL == tmp ) {
  1669     fprintf(stderr, "out of memory\n");
  1670     exit(1);
  1673   result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1674     ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
  1675   if( !result ) {
  1676     fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
  1677     goto done;
  1680   if( lenout != len ) {
  1681     fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
  1682     goto loser;
  1685   if( 0 != memcmp(ucs2s, tmp, len) ) {
  1686     fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
  1687     goto loser;
  1690   free(tmp); tmp = (void *)NULL;
  1692   /* UCS-2 -> UTF-8 */
  1693   len = strlen(ucs2_utf8);
  1694   tmp = calloc(len, 1);
  1695   if( (void *)NULL == tmp ) {
  1696     fprintf(stderr, "out of memory\n");
  1697     exit(1);
  1700   result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
  1701     (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), 
  1702     tmp, len, &lenout);
  1703   if( !result ) {
  1704     fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
  1705     goto done;
  1708   if( lenout != len ) {
  1709     fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
  1710     goto loser;
  1713   if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
  1714     fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
  1715     goto loser;
  1718   /* implement UTF16 */
  1720   result = PR_TRUE;
  1721   goto done;
  1723  loser:
  1724   result = PR_FALSE;
  1725  done:
  1726   free(ucs4s);
  1727   free(ucs4_utf8);
  1728   free(ucs2s);
  1729   free(ucs2_utf8);
  1730   if( (void *)NULL != tmp ) free(tmp);
  1731   return result;
  1734 void
  1735 byte_order
  1737   void
  1740   /*
  1741    * The implementation (now) expects the 16- and 32-bit characters
  1742    * to be in network byte order, not host byte order.  Therefore I
  1743    * have to byteswap all those test vectors above.  hton[ls] may be
  1744    * functions, so I have to do this dynamically.  If you want to 
  1745    * use this code to do host byte order conversions, just remove
  1746    * the call in main() to this function.
  1747    */
  1749   int i;
  1751   for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1752     struct ucs4 *e = &ucs4[i];
  1753     e->c = htonl(e->c);
  1756   for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1757     struct ucs2 *e = &ucs2[i];
  1758     e->c = htons(e->c);
  1761   for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
  1762     struct utf16 *e = &utf16[i];
  1763     e->c = htonl(e->c);
  1764     e->w[0] = htons(e->w[0]);
  1765     e->w[1] = htons(e->w[1]);
  1768   return;
  1771 int
  1772 main
  1774   int argc,
  1775   char *argv[]
  1778   byte_order();
  1780   if( test_ucs4_chars() &&
  1781       test_ucs2_chars() &&
  1782       test_utf16_chars() &&
  1783       test_utf8_bad_chars() &&
  1784       test_iso88591_chars() &&
  1785       test_zeroes() &&
  1786       test_multichars() &&
  1787       PR_TRUE ) {
  1788     fprintf(stderr, "PASS\n");
  1789     return 1;
  1790   } else {
  1791     fprintf(stderr, "FAIL\n");
  1792     return 0;
  1796 #endif /* TEST_UTF8 */

mercurial