security/nss/lib/util/utf8.c

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/nss/lib/util/utf8.c	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1796 @@
     1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.7 +
     1.8 +#include "seccomon.h"
     1.9 +#include "secport.h"
    1.10 +
    1.11 +#ifdef TEST_UTF8
    1.12 +#include <assert.h>
    1.13 +#undef PORT_Assert
    1.14 +#define PORT_Assert assert
    1.15 +#endif
    1.16 +
    1.17 +/*
    1.18 + * From RFC 2044:
    1.19 + *
    1.20 + * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
    1.21 + * 0000 0000-0000 007F   0xxxxxxx
    1.22 + * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
    1.23 + * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
    1.24 + * 0001 0000-001F FFFF   11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
    1.25 + * 0020 0000-03FF FFFF   111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
    1.26 + * 0400 0000-7FFF FFFF   1111110x 10xxxxxx ... 10xxxxxx
    1.27 + */  
    1.28 +
    1.29 +/*
    1.30 + * From http://www.imc.org/draft-hoffman-utf16
    1.31 + *
    1.32 + * For U on [0x00010000,0x0010FFFF]:  Let U' = U - 0x00010000
    1.33 + *
    1.34 + * U' = yyyyyyyyyyxxxxxxxxxx
    1.35 + * W1 = 110110yyyyyyyyyy
    1.36 + * W2 = 110111xxxxxxxxxx
    1.37 + */
    1.38 +
    1.39 +/*
    1.40 + * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
    1.41 + * character values.  If you wish to use this code for working with
    1.42 + * host byte order values, define the following:
    1.43 + *
    1.44 + * #if IS_BIG_ENDIAN
    1.45 + * #define L_0 0
    1.46 + * #define L_1 1
    1.47 + * #define L_2 2
    1.48 + * #define L_3 3
    1.49 + * #define H_0 0
    1.50 + * #define H_1 1
    1.51 + * #else / * not everyone has elif * /
    1.52 + * #if IS_LITTLE_ENDIAN
    1.53 + * #define L_0 3
    1.54 + * #define L_1 2
    1.55 + * #define L_2 1
    1.56 + * #define L_3 0
    1.57 + * #define H_0 1
    1.58 + * #define H_1 0
    1.59 + * #else
    1.60 + * #error "PDP and NUXI support deferred"
    1.61 + * #endif / * IS_LITTLE_ENDIAN * /
    1.62 + * #endif / * IS_BIG_ENDIAN * /
    1.63 + */
    1.64 +
    1.65 +#define L_0 0
    1.66 +#define L_1 1
    1.67 +#define L_2 2
    1.68 +#define L_3 3
    1.69 +#define H_0 0
    1.70 +#define H_1 1
    1.71 +
    1.72 +#define BAD_UTF8 ((PRUint32)-1)
    1.73 +
    1.74 +/*
    1.75 + * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
    1.76 + * of Unicode 4.0.0.
    1.77 + *
    1.78 + * Parameters:
    1.79 + * index - Points to the byte offset in inBuf of character to read.  On success,
    1.80 + *         updated to the offset of the following character.
    1.81 + * inBuf - Input buffer, UTF-8 encoded
    1.82 + * inbufLen - Length of input buffer, in bytes.
    1.83 + *
    1.84 + * Returns:
    1.85 + * Success - The UCS4 encoded character
    1.86 + * Failure - BAD_UTF8
    1.87 + */
    1.88 +static PRUint32
    1.89 +sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
    1.90 +{
    1.91 +  PRUint32 result;
    1.92 +  unsigned int i = *index;
    1.93 +  int bytes_left;
    1.94 +  PRUint32 min_value;
    1.95 +
    1.96 +  PORT_Assert(i < inBufLen);
    1.97 +
    1.98 +  if ( (inBuf[i] & 0x80) == 0x00 ) {
    1.99 +    result = inBuf[i++];
   1.100 +    bytes_left = 0;
   1.101 +    min_value = 0;
   1.102 +  } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
   1.103 +    result = inBuf[i++] & 0x1F;
   1.104 +    bytes_left = 1;
   1.105 +    min_value = 0x80;
   1.106 +  } else if ( (inBuf[i] & 0xF0) == 0xE0) {
   1.107 +    result = inBuf[i++] & 0x0F;
   1.108 +    bytes_left = 2;
   1.109 +    min_value = 0x800;
   1.110 +  } else if ( (inBuf[i] & 0xF8) == 0xF0) {
   1.111 +    result = inBuf[i++] & 0x07;
   1.112 +    bytes_left = 3;
   1.113 +    min_value = 0x10000;
   1.114 +  } else {
   1.115 +    return BAD_UTF8;
   1.116 +  }
   1.117 +
   1.118 +  while (bytes_left--) {
   1.119 +    if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
   1.120 +    result = (result << 6) | (inBuf[i++] & 0x3F);
   1.121 +  }
   1.122 +
   1.123 +  /* Check for overlong sequences, surrogates, and outside unicode range */
   1.124 +  if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
   1.125 +    return BAD_UTF8;
   1.126 +  }
   1.127 +
   1.128 +  *index = i;
   1.129 +  return result;
   1.130 +}
   1.131 +
   1.132 +PRBool
   1.133 +sec_port_ucs4_utf8_conversion_function
   1.134 +(
   1.135 +  PRBool toUnicode,
   1.136 +  unsigned char *inBuf,
   1.137 +  unsigned int inBufLen,
   1.138 +  unsigned char *outBuf,
   1.139 +  unsigned int maxOutBufLen,
   1.140 +  unsigned int *outBufLen
   1.141 +)
   1.142 +{
   1.143 +  PORT_Assert((unsigned int *)NULL != outBufLen);
   1.144 +
   1.145 +  if( toUnicode ) {
   1.146 +    unsigned int i, len = 0;
   1.147 +
   1.148 +    for( i = 0; i < inBufLen; ) {
   1.149 +      if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
   1.150 +      else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
   1.151 +      else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
   1.152 +      else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
   1.153 +      else return PR_FALSE;
   1.154 +
   1.155 +      len += 4;
   1.156 +    }
   1.157 +
   1.158 +    if( len > maxOutBufLen ) {
   1.159 +      *outBufLen = len;
   1.160 +      return PR_FALSE;
   1.161 +    }
   1.162 +
   1.163 +    len = 0;
   1.164 +
   1.165 +    for( i = 0; i < inBufLen; ) {
   1.166 +      PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
   1.167 +
   1.168 +      if (ucs4 == BAD_UTF8) return PR_FALSE;
   1.169 +           
   1.170 +      outBuf[len+L_0] = 0x00;
   1.171 +      outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
   1.172 +      outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
   1.173 +      outBuf[len+L_3] = (unsigned char)ucs4;
   1.174 +
   1.175 +      len += 4;
   1.176 +    }
   1.177 +
   1.178 +    *outBufLen = len;
   1.179 +    return PR_TRUE;
   1.180 +  } else {
   1.181 +    unsigned int i, len = 0;
   1.182 +    PORT_Assert((inBufLen % 4) == 0);
   1.183 +    if ((inBufLen % 4) != 0) {
   1.184 +      *outBufLen = 0;
   1.185 +      return PR_FALSE;
   1.186 +    }
   1.187 +
   1.188 +    for( i = 0; i < inBufLen; i += 4 ) {
   1.189 +      if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
   1.190 +	*outBufLen = 0;
   1.191 +	return PR_FALSE;
   1.192 +      } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
   1.193 +      else if( inBuf[i+L_2] >= 0x08 ) len += 3;
   1.194 +      else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
   1.195 +      else len += 1;
   1.196 +    }
   1.197 +
   1.198 +    if( len > maxOutBufLen ) {
   1.199 +      *outBufLen = len;
   1.200 +      return PR_FALSE;
   1.201 +    }
   1.202 +
   1.203 +    len = 0;
   1.204 +
   1.205 +    for( i = 0; i < inBufLen; i += 4 ) {
   1.206 +      if( inBuf[i+L_1] >= 0x01 ) {
   1.207 +        /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
   1.208 +        /* 00000000 000abcde fghijklm nopqrstu ->
   1.209 +           11110abc 10defghi 10jklmno 10pqrstu */
   1.210 +
   1.211 +        outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
   1.212 +        outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
   1.213 +                             | ((inBuf[i+L_2] & 0xF0) >> 4);
   1.214 +        outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
   1.215 +                             | ((inBuf[i+L_3] & 0xC0) >> 6);
   1.216 +        outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
   1.217 +
   1.218 +        len += 4;
   1.219 +      } else if( inBuf[i+L_2] >= 0x08 ) {
   1.220 +        /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
   1.221 +        /* 00000000 00000000 abcdefgh ijklmnop ->
   1.222 +           1110abcd 10efghij 10klmnop */
   1.223 +
   1.224 +        outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
   1.225 +        outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
   1.226 +                             | ((inBuf[i+L_3] & 0xC0) >> 6);
   1.227 +        outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
   1.228 +
   1.229 +        len += 3;
   1.230 +      } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
   1.231 +        /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
   1.232 +        /* 00000000 00000000 00000abc defghijk ->
   1.233 +           110abcde 10fghijk */
   1.234 +
   1.235 +        outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
   1.236 +                             | ((inBuf[i+L_3] & 0xC0) >> 6);
   1.237 +        outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
   1.238 +
   1.239 +        len += 2;
   1.240 +      } else {
   1.241 +        /* 0000 0000-0000 007F -> 0xxxxxx */
   1.242 +        /* 00000000 00000000 00000000 0abcdefg ->
   1.243 +           0abcdefg */
   1.244 +
   1.245 +        outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
   1.246 +
   1.247 +        len += 1;
   1.248 +      }
   1.249 +    }
   1.250 +                            
   1.251 +    *outBufLen = len;
   1.252 +    return PR_TRUE;
   1.253 +  }
   1.254 +}
   1.255 +
   1.256 +PRBool
   1.257 +sec_port_ucs2_utf8_conversion_function
   1.258 +(
   1.259 +  PRBool toUnicode,
   1.260 +  unsigned char *inBuf,
   1.261 +  unsigned int inBufLen,
   1.262 +  unsigned char *outBuf,
   1.263 +  unsigned int maxOutBufLen,
   1.264 +  unsigned int *outBufLen
   1.265 +)
   1.266 +{
   1.267 +  PORT_Assert((unsigned int *)NULL != outBufLen);
   1.268 +
   1.269 +  if( toUnicode ) {
   1.270 +    unsigned int i, len = 0;
   1.271 +
   1.272 +    for( i = 0; i < inBufLen; ) {
   1.273 +      if( (inBuf[i] & 0x80) == 0x00 ) {
   1.274 +        i += 1;
   1.275 +        len += 2;
   1.276 +      } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
   1.277 +        i += 2;
   1.278 +        len += 2;
   1.279 +      } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
   1.280 +        i += 3;
   1.281 +        len += 2;
   1.282 +      } else if( (inBuf[i] & 0xF8) == 0xF0 ) { 
   1.283 +        i += 4;
   1.284 +        len += 4;
   1.285 +      } else return PR_FALSE;
   1.286 +    }
   1.287 +
   1.288 +    if( len > maxOutBufLen ) {
   1.289 +      *outBufLen = len;
   1.290 +      return PR_FALSE;
   1.291 +    }
   1.292 +
   1.293 +    len = 0;
   1.294 +
   1.295 +    for( i = 0; i < inBufLen; ) {
   1.296 +      PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
   1.297 +
   1.298 +      if (ucs4 == BAD_UTF8) return PR_FALSE;
   1.299 +
   1.300 +      if( ucs4 < 0x10000) {
   1.301 +        outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
   1.302 +        outBuf[len+H_1] = (unsigned char)ucs4;
   1.303 +        len += 2;
   1.304 +      } else {
   1.305 +	ucs4 -= 0x10000;
   1.306 +        outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
   1.307 +        outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
   1.308 +        outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
   1.309 +        outBuf[len+2+H_1] = (unsigned char)ucs4;
   1.310 +	len += 4;
   1.311 +      }
   1.312 +    }
   1.313 +
   1.314 +    *outBufLen = len;
   1.315 +    return PR_TRUE;
   1.316 +  } else {
   1.317 +    unsigned int i, len = 0;
   1.318 +    PORT_Assert((inBufLen % 2) == 0);
   1.319 +    if ((inBufLen % 2) != 0) {
   1.320 +      *outBufLen = 0;
   1.321 +      return PR_FALSE;
   1.322 +    }
   1.323 +
   1.324 +    for( i = 0; i < inBufLen; i += 2 ) {
   1.325 +      if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
   1.326 +      else if( inBuf[i+H_0] < 0x08 ) len += 2;
   1.327 +      else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
   1.328 +        if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
   1.329 +          i += 2;
   1.330 +          len += 4;
   1.331 +        } else {
   1.332 +          return PR_FALSE;
   1.333 +        }
   1.334 +      }
   1.335 +      else len += 3;
   1.336 +    }
   1.337 +
   1.338 +    if( len > maxOutBufLen ) {
   1.339 +      *outBufLen = len;
   1.340 +      return PR_FALSE;
   1.341 +    }
   1.342 +
   1.343 +    len = 0;
   1.344 +
   1.345 +    for( i = 0; i < inBufLen; i += 2 ) {
   1.346 +      if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
   1.347 +        /* 0000-007F -> 0xxxxxx */
   1.348 +        /* 00000000 0abcdefg -> 0abcdefg */
   1.349 +
   1.350 +        outBuf[len] = inBuf[i+H_1] & 0x7F;
   1.351 +
   1.352 +        len += 1;
   1.353 +      } else if( inBuf[i+H_0] < 0x08 ) {
   1.354 +        /* 0080-07FF -> 110xxxxx 10xxxxxx */
   1.355 +        /* 00000abc defghijk -> 110abcde 10fghijk */
   1.356 +
   1.357 +        outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) 
   1.358 +                             | ((inBuf[i+H_1] & 0xC0) >> 6);
   1.359 +        outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
   1.360 +
   1.361 +        len += 2;
   1.362 +      } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
   1.363 +        int abcde, BCDE;
   1.364 +
   1.365 +        PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
   1.366 +
   1.367 +        /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
   1.368 +        /* 110110BC DEfghijk 110111lm nopqrstu ->
   1.369 +           { Let abcde = BCDE + 1 }
   1.370 +           11110abc 10defghi 10jklmno 10pqrstu */
   1.371 +
   1.372 +        BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
   1.373 +        abcde = BCDE + 1;
   1.374 +
   1.375 +        outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
   1.376 +        outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) 
   1.377 +                             | ((inBuf[i+0+H_1] & 0x3C) >> 2);
   1.378 +        outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
   1.379 +                             | ((inBuf[i+2+H_0] & 0x03) << 2)
   1.380 +                             | ((inBuf[i+2+H_1] & 0xC0) >> 6);
   1.381 +        outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
   1.382 +
   1.383 +        i += 2;
   1.384 +        len += 4;
   1.385 +      } else {
   1.386 +        /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
   1.387 +        /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
   1.388 +
   1.389 +        outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
   1.390 +        outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) 
   1.391 +                             | ((inBuf[i+H_1] & 0xC0) >> 6);
   1.392 +        outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
   1.393 +
   1.394 +        len += 3;
   1.395 +      }
   1.396 +    }
   1.397 +
   1.398 +    *outBufLen = len;
   1.399 +    return PR_TRUE;
   1.400 +  }
   1.401 +}
   1.402 +
   1.403 +PRBool
   1.404 +sec_port_iso88591_utf8_conversion_function
   1.405 +(
   1.406 +  const unsigned char *inBuf,
   1.407 +  unsigned int inBufLen,
   1.408 +  unsigned char *outBuf,
   1.409 +  unsigned int maxOutBufLen,
   1.410 +  unsigned int *outBufLen
   1.411 +)
   1.412 +{
   1.413 +  unsigned int i, len = 0;
   1.414 +
   1.415 +  PORT_Assert((unsigned int *)NULL != outBufLen);
   1.416 +
   1.417 +  for( i = 0; i < inBufLen; i++) {
   1.418 +    if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
   1.419 +    else len += 2;
   1.420 +  }
   1.421 +
   1.422 +  if( len > maxOutBufLen ) {
   1.423 +    *outBufLen = len;
   1.424 +    return PR_FALSE;
   1.425 +  }
   1.426 +
   1.427 +  len = 0;
   1.428 +
   1.429 +  for( i = 0; i < inBufLen; i++) {
   1.430 +    if( (inBuf[i] & 0x80) == 0x00 ) {
   1.431 +      /* 00-7F -> 0xxxxxxx */
   1.432 +      /* 0abcdefg -> 0abcdefg */
   1.433 +
   1.434 +      outBuf[len] = inBuf[i];
   1.435 +      len += 1;
   1.436 +    } else {
   1.437 +      /* 80-FF <- 110xxxxx 10xxxxxx */
   1.438 +      /* 00000000 abcdefgh -> 110000ab 10cdefgh */
   1.439 +
   1.440 +      outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
   1.441 +      outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
   1.442 +
   1.443 +      len += 2;
   1.444 +    }
   1.445 +  }
   1.446 +
   1.447 +  *outBufLen = len;
   1.448 +  return PR_TRUE;
   1.449 +}
   1.450 +
   1.451 +#ifdef TEST_UTF8
   1.452 +
   1.453 +#include <stdio.h>
   1.454 +#include <string.h>
   1.455 +#include <stdlib.h>
   1.456 +#include <netinet/in.h> /* for htonl and htons */
   1.457 +
   1.458 +/*
   1.459 + * UCS-4 vectors
   1.460 + */
   1.461 +
   1.462 +struct ucs4 {
   1.463 +  PRUint32 c;
   1.464 +  char *utf8;
   1.465 +};
   1.466 +
   1.467 +/*
   1.468 + * UCS-2 vectors
   1.469 + */
   1.470 +
   1.471 +struct ucs2 {
   1.472 +  PRUint16 c;
   1.473 +  char *utf8;
   1.474 +};
   1.475 +
   1.476 +/*
   1.477 + * UTF-16 vectors
   1.478 + */
   1.479 +
   1.480 +struct utf16 {
   1.481 +  PRUint32 c;
   1.482 +  PRUint16 w[2];
   1.483 +};
   1.484 +
   1.485 +
   1.486 +/*
   1.487 + * UCS-4 vectors
   1.488 + */
   1.489 +
   1.490 +struct ucs4 ucs4[] = {
   1.491 +  { 0x00000001, "\x01" },
   1.492 +  { 0x00000002, "\x02" },
   1.493 +  { 0x00000003, "\x03" },
   1.494 +  { 0x00000004, "\x04" },
   1.495 +  { 0x00000007, "\x07" },
   1.496 +  { 0x00000008, "\x08" },
   1.497 +  { 0x0000000F, "\x0F" },
   1.498 +  { 0x00000010, "\x10" },
   1.499 +  { 0x0000001F, "\x1F" },
   1.500 +  { 0x00000020, "\x20" },
   1.501 +  { 0x0000003F, "\x3F" },
   1.502 +  { 0x00000040, "\x40" },
   1.503 +  { 0x0000007F, "\x7F" },
   1.504 +          
   1.505 +  { 0x00000080, "\xC2\x80" },
   1.506 +  { 0x00000081, "\xC2\x81" },
   1.507 +  { 0x00000082, "\xC2\x82" },
   1.508 +  { 0x00000084, "\xC2\x84" },
   1.509 +  { 0x00000088, "\xC2\x88" },
   1.510 +  { 0x00000090, "\xC2\x90" },
   1.511 +  { 0x000000A0, "\xC2\xA0" },
   1.512 +  { 0x000000C0, "\xC3\x80" },
   1.513 +  { 0x000000FF, "\xC3\xBF" },
   1.514 +  { 0x00000100, "\xC4\x80" },
   1.515 +  { 0x00000101, "\xC4\x81" },
   1.516 +  { 0x00000102, "\xC4\x82" },
   1.517 +  { 0x00000104, "\xC4\x84" },
   1.518 +  { 0x00000108, "\xC4\x88" },
   1.519 +  { 0x00000110, "\xC4\x90" },
   1.520 +  { 0x00000120, "\xC4\xA0" },
   1.521 +  { 0x00000140, "\xC5\x80" },
   1.522 +  { 0x00000180, "\xC6\x80" },
   1.523 +  { 0x000001FF, "\xC7\xBF" },
   1.524 +  { 0x00000200, "\xC8\x80" },
   1.525 +  { 0x00000201, "\xC8\x81" },
   1.526 +  { 0x00000202, "\xC8\x82" },
   1.527 +  { 0x00000204, "\xC8\x84" },
   1.528 +  { 0x00000208, "\xC8\x88" },
   1.529 +  { 0x00000210, "\xC8\x90" },
   1.530 +  { 0x00000220, "\xC8\xA0" },
   1.531 +  { 0x00000240, "\xC9\x80" },
   1.532 +  { 0x00000280, "\xCA\x80" },
   1.533 +  { 0x00000300, "\xCC\x80" },
   1.534 +  { 0x000003FF, "\xCF\xBF" },
   1.535 +  { 0x00000400, "\xD0\x80" },
   1.536 +  { 0x00000401, "\xD0\x81" },
   1.537 +  { 0x00000402, "\xD0\x82" },
   1.538 +  { 0x00000404, "\xD0\x84" },
   1.539 +  { 0x00000408, "\xD0\x88" },
   1.540 +  { 0x00000410, "\xD0\x90" },
   1.541 +  { 0x00000420, "\xD0\xA0" },
   1.542 +  { 0x00000440, "\xD1\x80" },
   1.543 +  { 0x00000480, "\xD2\x80" },
   1.544 +  { 0x00000500, "\xD4\x80" },
   1.545 +  { 0x00000600, "\xD8\x80" },
   1.546 +  { 0x000007FF, "\xDF\xBF" },
   1.547 +          
   1.548 +  { 0x00000800, "\xE0\xA0\x80" },
   1.549 +  { 0x00000801, "\xE0\xA0\x81" },
   1.550 +  { 0x00000802, "\xE0\xA0\x82" },
   1.551 +  { 0x00000804, "\xE0\xA0\x84" },
   1.552 +  { 0x00000808, "\xE0\xA0\x88" },
   1.553 +  { 0x00000810, "\xE0\xA0\x90" },
   1.554 +  { 0x00000820, "\xE0\xA0\xA0" },
   1.555 +  { 0x00000840, "\xE0\xA1\x80" },
   1.556 +  { 0x00000880, "\xE0\xA2\x80" },
   1.557 +  { 0x00000900, "\xE0\xA4\x80" },
   1.558 +  { 0x00000A00, "\xE0\xA8\x80" },
   1.559 +  { 0x00000C00, "\xE0\xB0\x80" },
   1.560 +  { 0x00000FFF, "\xE0\xBF\xBF" },
   1.561 +  { 0x00001000, "\xE1\x80\x80" },
   1.562 +  { 0x00001001, "\xE1\x80\x81" },
   1.563 +  { 0x00001002, "\xE1\x80\x82" },
   1.564 +  { 0x00001004, "\xE1\x80\x84" },
   1.565 +  { 0x00001008, "\xE1\x80\x88" },
   1.566 +  { 0x00001010, "\xE1\x80\x90" },
   1.567 +  { 0x00001020, "\xE1\x80\xA0" },
   1.568 +  { 0x00001040, "\xE1\x81\x80" },
   1.569 +  { 0x00001080, "\xE1\x82\x80" },
   1.570 +  { 0x00001100, "\xE1\x84\x80" },
   1.571 +  { 0x00001200, "\xE1\x88\x80" },
   1.572 +  { 0x00001400, "\xE1\x90\x80" },
   1.573 +  { 0x00001800, "\xE1\xA0\x80" },
   1.574 +  { 0x00001FFF, "\xE1\xBF\xBF" },
   1.575 +  { 0x00002000, "\xE2\x80\x80" },
   1.576 +  { 0x00002001, "\xE2\x80\x81" },
   1.577 +  { 0x00002002, "\xE2\x80\x82" },
   1.578 +  { 0x00002004, "\xE2\x80\x84" },
   1.579 +  { 0x00002008, "\xE2\x80\x88" },
   1.580 +  { 0x00002010, "\xE2\x80\x90" },
   1.581 +  { 0x00002020, "\xE2\x80\xA0" },
   1.582 +  { 0x00002040, "\xE2\x81\x80" },
   1.583 +  { 0x00002080, "\xE2\x82\x80" },
   1.584 +  { 0x00002100, "\xE2\x84\x80" },
   1.585 +  { 0x00002200, "\xE2\x88\x80" },
   1.586 +  { 0x00002400, "\xE2\x90\x80" },
   1.587 +  { 0x00002800, "\xE2\xA0\x80" },
   1.588 +  { 0x00003000, "\xE3\x80\x80" },
   1.589 +  { 0x00003FFF, "\xE3\xBF\xBF" },
   1.590 +  { 0x00004000, "\xE4\x80\x80" },
   1.591 +  { 0x00004001, "\xE4\x80\x81" },
   1.592 +  { 0x00004002, "\xE4\x80\x82" },
   1.593 +  { 0x00004004, "\xE4\x80\x84" },
   1.594 +  { 0x00004008, "\xE4\x80\x88" },
   1.595 +  { 0x00004010, "\xE4\x80\x90" },
   1.596 +  { 0x00004020, "\xE4\x80\xA0" },
   1.597 +  { 0x00004040, "\xE4\x81\x80" },
   1.598 +  { 0x00004080, "\xE4\x82\x80" },
   1.599 +  { 0x00004100, "\xE4\x84\x80" },
   1.600 +  { 0x00004200, "\xE4\x88\x80" },
   1.601 +  { 0x00004400, "\xE4\x90\x80" },
   1.602 +  { 0x00004800, "\xE4\xA0\x80" },
   1.603 +  { 0x00005000, "\xE5\x80\x80" },
   1.604 +  { 0x00006000, "\xE6\x80\x80" },
   1.605 +  { 0x00007FFF, "\xE7\xBF\xBF" },
   1.606 +  { 0x00008000, "\xE8\x80\x80" },
   1.607 +  { 0x00008001, "\xE8\x80\x81" },
   1.608 +  { 0x00008002, "\xE8\x80\x82" },
   1.609 +  { 0x00008004, "\xE8\x80\x84" },
   1.610 +  { 0x00008008, "\xE8\x80\x88" },
   1.611 +  { 0x00008010, "\xE8\x80\x90" },
   1.612 +  { 0x00008020, "\xE8\x80\xA0" },
   1.613 +  { 0x00008040, "\xE8\x81\x80" },
   1.614 +  { 0x00008080, "\xE8\x82\x80" },
   1.615 +  { 0x00008100, "\xE8\x84\x80" },
   1.616 +  { 0x00008200, "\xE8\x88\x80" },
   1.617 +  { 0x00008400, "\xE8\x90\x80" },
   1.618 +  { 0x00008800, "\xE8\xA0\x80" },
   1.619 +  { 0x00009000, "\xE9\x80\x80" },
   1.620 +  { 0x0000A000, "\xEA\x80\x80" },
   1.621 +  { 0x0000C000, "\xEC\x80\x80" },
   1.622 +  { 0x0000FFFF, "\xEF\xBF\xBF" },
   1.623 +          
   1.624 +  { 0x00010000, "\xF0\x90\x80\x80" },
   1.625 +  { 0x00010001, "\xF0\x90\x80\x81" },
   1.626 +  { 0x00010002, "\xF0\x90\x80\x82" },
   1.627 +  { 0x00010004, "\xF0\x90\x80\x84" },
   1.628 +  { 0x00010008, "\xF0\x90\x80\x88" },
   1.629 +  { 0x00010010, "\xF0\x90\x80\x90" },
   1.630 +  { 0x00010020, "\xF0\x90\x80\xA0" },
   1.631 +  { 0x00010040, "\xF0\x90\x81\x80" },
   1.632 +  { 0x00010080, "\xF0\x90\x82\x80" },
   1.633 +  { 0x00010100, "\xF0\x90\x84\x80" },
   1.634 +  { 0x00010200, "\xF0\x90\x88\x80" },
   1.635 +  { 0x00010400, "\xF0\x90\x90\x80" },
   1.636 +  { 0x00010800, "\xF0\x90\xA0\x80" },
   1.637 +  { 0x00011000, "\xF0\x91\x80\x80" },
   1.638 +  { 0x00012000, "\xF0\x92\x80\x80" },
   1.639 +  { 0x00014000, "\xF0\x94\x80\x80" },
   1.640 +  { 0x00018000, "\xF0\x98\x80\x80" },
   1.641 +  { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
   1.642 +  { 0x00020000, "\xF0\xA0\x80\x80" },
   1.643 +  { 0x00020001, "\xF0\xA0\x80\x81" },
   1.644 +  { 0x00020002, "\xF0\xA0\x80\x82" },
   1.645 +  { 0x00020004, "\xF0\xA0\x80\x84" },
   1.646 +  { 0x00020008, "\xF0\xA0\x80\x88" },
   1.647 +  { 0x00020010, "\xF0\xA0\x80\x90" },
   1.648 +  { 0x00020020, "\xF0\xA0\x80\xA0" },
   1.649 +  { 0x00020040, "\xF0\xA0\x81\x80" },
   1.650 +  { 0x00020080, "\xF0\xA0\x82\x80" },
   1.651 +  { 0x00020100, "\xF0\xA0\x84\x80" },
   1.652 +  { 0x00020200, "\xF0\xA0\x88\x80" },
   1.653 +  { 0x00020400, "\xF0\xA0\x90\x80" },
   1.654 +  { 0x00020800, "\xF0\xA0\xA0\x80" },
   1.655 +  { 0x00021000, "\xF0\xA1\x80\x80" },
   1.656 +  { 0x00022000, "\xF0\xA2\x80\x80" },
   1.657 +  { 0x00024000, "\xF0\xA4\x80\x80" },
   1.658 +  { 0x00028000, "\xF0\xA8\x80\x80" },
   1.659 +  { 0x00030000, "\xF0\xB0\x80\x80" },
   1.660 +  { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
   1.661 +  { 0x00040000, "\xF1\x80\x80\x80" },
   1.662 +  { 0x00040001, "\xF1\x80\x80\x81" },
   1.663 +  { 0x00040002, "\xF1\x80\x80\x82" },
   1.664 +  { 0x00040004, "\xF1\x80\x80\x84" },
   1.665 +  { 0x00040008, "\xF1\x80\x80\x88" },
   1.666 +  { 0x00040010, "\xF1\x80\x80\x90" },
   1.667 +  { 0x00040020, "\xF1\x80\x80\xA0" },
   1.668 +  { 0x00040040, "\xF1\x80\x81\x80" },
   1.669 +  { 0x00040080, "\xF1\x80\x82\x80" },
   1.670 +  { 0x00040100, "\xF1\x80\x84\x80" },
   1.671 +  { 0x00040200, "\xF1\x80\x88\x80" },
   1.672 +  { 0x00040400, "\xF1\x80\x90\x80" },
   1.673 +  { 0x00040800, "\xF1\x80\xA0\x80" },
   1.674 +  { 0x00041000, "\xF1\x81\x80\x80" },
   1.675 +  { 0x00042000, "\xF1\x82\x80\x80" },
   1.676 +  { 0x00044000, "\xF1\x84\x80\x80" },
   1.677 +  { 0x00048000, "\xF1\x88\x80\x80" },
   1.678 +  { 0x00050000, "\xF1\x90\x80\x80" },
   1.679 +  { 0x00060000, "\xF1\xA0\x80\x80" },
   1.680 +  { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
   1.681 +  { 0x00080000, "\xF2\x80\x80\x80" },
   1.682 +  { 0x00080001, "\xF2\x80\x80\x81" },
   1.683 +  { 0x00080002, "\xF2\x80\x80\x82" },
   1.684 +  { 0x00080004, "\xF2\x80\x80\x84" },
   1.685 +  { 0x00080008, "\xF2\x80\x80\x88" },
   1.686 +  { 0x00080010, "\xF2\x80\x80\x90" },
   1.687 +  { 0x00080020, "\xF2\x80\x80\xA0" },
   1.688 +  { 0x00080040, "\xF2\x80\x81\x80" },
   1.689 +  { 0x00080080, "\xF2\x80\x82\x80" },
   1.690 +  { 0x00080100, "\xF2\x80\x84\x80" },
   1.691 +  { 0x00080200, "\xF2\x80\x88\x80" },
   1.692 +  { 0x00080400, "\xF2\x80\x90\x80" },
   1.693 +  { 0x00080800, "\xF2\x80\xA0\x80" },
   1.694 +  { 0x00081000, "\xF2\x81\x80\x80" },
   1.695 +  { 0x00082000, "\xF2\x82\x80\x80" },
   1.696 +  { 0x00084000, "\xF2\x84\x80\x80" },
   1.697 +  { 0x00088000, "\xF2\x88\x80\x80" },
   1.698 +  { 0x00090000, "\xF2\x90\x80\x80" },
   1.699 +  { 0x000A0000, "\xF2\xA0\x80\x80" },
   1.700 +  { 0x000C0000, "\xF3\x80\x80\x80" },
   1.701 +  { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
   1.702 +  { 0x00100000, "\xF4\x80\x80\x80" },
   1.703 +  { 0x00100001, "\xF4\x80\x80\x81" },
   1.704 +  { 0x00100002, "\xF4\x80\x80\x82" },
   1.705 +  { 0x00100004, "\xF4\x80\x80\x84" },
   1.706 +  { 0x00100008, "\xF4\x80\x80\x88" },
   1.707 +  { 0x00100010, "\xF4\x80\x80\x90" },
   1.708 +  { 0x00100020, "\xF4\x80\x80\xA0" },
   1.709 +  { 0x00100040, "\xF4\x80\x81\x80" },
   1.710 +  { 0x00100080, "\xF4\x80\x82\x80" },
   1.711 +  { 0x00100100, "\xF4\x80\x84\x80" },
   1.712 +  { 0x00100200, "\xF4\x80\x88\x80" },
   1.713 +  { 0x00100400, "\xF4\x80\x90\x80" },
   1.714 +  { 0x00100800, "\xF4\x80\xA0\x80" },
   1.715 +  { 0x00101000, "\xF4\x81\x80\x80" },
   1.716 +  { 0x00102000, "\xF4\x82\x80\x80" },
   1.717 +  { 0x00104000, "\xF4\x84\x80\x80" },
   1.718 +  { 0x00108000, "\xF4\x88\x80\x80" },
   1.719 +  { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
   1.720 +};
   1.721 +
   1.722 +/*
   1.723 + * UCS-2 vectors
   1.724 + */
   1.725 +
   1.726 +struct ucs2 ucs2[] = {
   1.727 +  { 0x0001, "\x01" },
   1.728 +  { 0x0002, "\x02" },
   1.729 +  { 0x0003, "\x03" },
   1.730 +  { 0x0004, "\x04" },
   1.731 +  { 0x0007, "\x07" },
   1.732 +  { 0x0008, "\x08" },
   1.733 +  { 0x000F, "\x0F" },
   1.734 +  { 0x0010, "\x10" },
   1.735 +  { 0x001F, "\x1F" },
   1.736 +  { 0x0020, "\x20" },
   1.737 +  { 0x003F, "\x3F" },
   1.738 +  { 0x0040, "\x40" },
   1.739 +  { 0x007F, "\x7F" },
   1.740 +          
   1.741 +  { 0x0080, "\xC2\x80" },
   1.742 +  { 0x0081, "\xC2\x81" },
   1.743 +  { 0x0082, "\xC2\x82" },
   1.744 +  { 0x0084, "\xC2\x84" },
   1.745 +  { 0x0088, "\xC2\x88" },
   1.746 +  { 0x0090, "\xC2\x90" },
   1.747 +  { 0x00A0, "\xC2\xA0" },
   1.748 +  { 0x00C0, "\xC3\x80" },
   1.749 +  { 0x00FF, "\xC3\xBF" },
   1.750 +  { 0x0100, "\xC4\x80" },
   1.751 +  { 0x0101, "\xC4\x81" },
   1.752 +  { 0x0102, "\xC4\x82" },
   1.753 +  { 0x0104, "\xC4\x84" },
   1.754 +  { 0x0108, "\xC4\x88" },
   1.755 +  { 0x0110, "\xC4\x90" },
   1.756 +  { 0x0120, "\xC4\xA0" },
   1.757 +  { 0x0140, "\xC5\x80" },
   1.758 +  { 0x0180, "\xC6\x80" },
   1.759 +  { 0x01FF, "\xC7\xBF" },
   1.760 +  { 0x0200, "\xC8\x80" },
   1.761 +  { 0x0201, "\xC8\x81" },
   1.762 +  { 0x0202, "\xC8\x82" },
   1.763 +  { 0x0204, "\xC8\x84" },
   1.764 +  { 0x0208, "\xC8\x88" },
   1.765 +  { 0x0210, "\xC8\x90" },
   1.766 +  { 0x0220, "\xC8\xA0" },
   1.767 +  { 0x0240, "\xC9\x80" },
   1.768 +  { 0x0280, "\xCA\x80" },
   1.769 +  { 0x0300, "\xCC\x80" },
   1.770 +  { 0x03FF, "\xCF\xBF" },
   1.771 +  { 0x0400, "\xD0\x80" },
   1.772 +  { 0x0401, "\xD0\x81" },
   1.773 +  { 0x0402, "\xD0\x82" },
   1.774 +  { 0x0404, "\xD0\x84" },
   1.775 +  { 0x0408, "\xD0\x88" },
   1.776 +  { 0x0410, "\xD0\x90" },
   1.777 +  { 0x0420, "\xD0\xA0" },
   1.778 +  { 0x0440, "\xD1\x80" },
   1.779 +  { 0x0480, "\xD2\x80" },
   1.780 +  { 0x0500, "\xD4\x80" },
   1.781 +  { 0x0600, "\xD8\x80" },
   1.782 +  { 0x07FF, "\xDF\xBF" },
   1.783 +          
   1.784 +  { 0x0800, "\xE0\xA0\x80" },
   1.785 +  { 0x0801, "\xE0\xA0\x81" },
   1.786 +  { 0x0802, "\xE0\xA0\x82" },
   1.787 +  { 0x0804, "\xE0\xA0\x84" },
   1.788 +  { 0x0808, "\xE0\xA0\x88" },
   1.789 +  { 0x0810, "\xE0\xA0\x90" },
   1.790 +  { 0x0820, "\xE0\xA0\xA0" },
   1.791 +  { 0x0840, "\xE0\xA1\x80" },
   1.792 +  { 0x0880, "\xE0\xA2\x80" },
   1.793 +  { 0x0900, "\xE0\xA4\x80" },
   1.794 +  { 0x0A00, "\xE0\xA8\x80" },
   1.795 +  { 0x0C00, "\xE0\xB0\x80" },
   1.796 +  { 0x0FFF, "\xE0\xBF\xBF" },
   1.797 +  { 0x1000, "\xE1\x80\x80" },
   1.798 +  { 0x1001, "\xE1\x80\x81" },
   1.799 +  { 0x1002, "\xE1\x80\x82" },
   1.800 +  { 0x1004, "\xE1\x80\x84" },
   1.801 +  { 0x1008, "\xE1\x80\x88" },
   1.802 +  { 0x1010, "\xE1\x80\x90" },
   1.803 +  { 0x1020, "\xE1\x80\xA0" },
   1.804 +  { 0x1040, "\xE1\x81\x80" },
   1.805 +  { 0x1080, "\xE1\x82\x80" },
   1.806 +  { 0x1100, "\xE1\x84\x80" },
   1.807 +  { 0x1200, "\xE1\x88\x80" },
   1.808 +  { 0x1400, "\xE1\x90\x80" },
   1.809 +  { 0x1800, "\xE1\xA0\x80" },
   1.810 +  { 0x1FFF, "\xE1\xBF\xBF" },
   1.811 +  { 0x2000, "\xE2\x80\x80" },
   1.812 +  { 0x2001, "\xE2\x80\x81" },
   1.813 +  { 0x2002, "\xE2\x80\x82" },
   1.814 +  { 0x2004, "\xE2\x80\x84" },
   1.815 +  { 0x2008, "\xE2\x80\x88" },
   1.816 +  { 0x2010, "\xE2\x80\x90" },
   1.817 +  { 0x2020, "\xE2\x80\xA0" },
   1.818 +  { 0x2040, "\xE2\x81\x80" },
   1.819 +  { 0x2080, "\xE2\x82\x80" },
   1.820 +  { 0x2100, "\xE2\x84\x80" },
   1.821 +  { 0x2200, "\xE2\x88\x80" },
   1.822 +  { 0x2400, "\xE2\x90\x80" },
   1.823 +  { 0x2800, "\xE2\xA0\x80" },
   1.824 +  { 0x3000, "\xE3\x80\x80" },
   1.825 +  { 0x3FFF, "\xE3\xBF\xBF" },
   1.826 +  { 0x4000, "\xE4\x80\x80" },
   1.827 +  { 0x4001, "\xE4\x80\x81" },
   1.828 +  { 0x4002, "\xE4\x80\x82" },
   1.829 +  { 0x4004, "\xE4\x80\x84" },
   1.830 +  { 0x4008, "\xE4\x80\x88" },
   1.831 +  { 0x4010, "\xE4\x80\x90" },
   1.832 +  { 0x4020, "\xE4\x80\xA0" },
   1.833 +  { 0x4040, "\xE4\x81\x80" },
   1.834 +  { 0x4080, "\xE4\x82\x80" },
   1.835 +  { 0x4100, "\xE4\x84\x80" },
   1.836 +  { 0x4200, "\xE4\x88\x80" },
   1.837 +  { 0x4400, "\xE4\x90\x80" },
   1.838 +  { 0x4800, "\xE4\xA0\x80" },
   1.839 +  { 0x5000, "\xE5\x80\x80" },
   1.840 +  { 0x6000, "\xE6\x80\x80" },
   1.841 +  { 0x7FFF, "\xE7\xBF\xBF" },
   1.842 +  { 0x8000, "\xE8\x80\x80" },
   1.843 +  { 0x8001, "\xE8\x80\x81" },
   1.844 +  { 0x8002, "\xE8\x80\x82" },
   1.845 +  { 0x8004, "\xE8\x80\x84" },
   1.846 +  { 0x8008, "\xE8\x80\x88" },
   1.847 +  { 0x8010, "\xE8\x80\x90" },
   1.848 +  { 0x8020, "\xE8\x80\xA0" },
   1.849 +  { 0x8040, "\xE8\x81\x80" },
   1.850 +  { 0x8080, "\xE8\x82\x80" },
   1.851 +  { 0x8100, "\xE8\x84\x80" },
   1.852 +  { 0x8200, "\xE8\x88\x80" },
   1.853 +  { 0x8400, "\xE8\x90\x80" },
   1.854 +  { 0x8800, "\xE8\xA0\x80" },
   1.855 +  { 0x9000, "\xE9\x80\x80" },
   1.856 +  { 0xA000, "\xEA\x80\x80" },
   1.857 +  { 0xC000, "\xEC\x80\x80" },
   1.858 +  { 0xFFFF, "\xEF\xBF\xBF" }
   1.859 +
   1.860 +};
   1.861 +
   1.862 +/*
   1.863 + * UTF-16 vectors
   1.864 + */
   1.865 +
   1.866 +struct utf16 utf16[] = {
   1.867 +  { 0x00010000, { 0xD800, 0xDC00 } },
   1.868 +  { 0x00010001, { 0xD800, 0xDC01 } },
   1.869 +  { 0x00010002, { 0xD800, 0xDC02 } },
   1.870 +  { 0x00010003, { 0xD800, 0xDC03 } },
   1.871 +  { 0x00010004, { 0xD800, 0xDC04 } },
   1.872 +  { 0x00010007, { 0xD800, 0xDC07 } },
   1.873 +  { 0x00010008, { 0xD800, 0xDC08 } },
   1.874 +  { 0x0001000F, { 0xD800, 0xDC0F } },
   1.875 +  { 0x00010010, { 0xD800, 0xDC10 } },
   1.876 +  { 0x0001001F, { 0xD800, 0xDC1F } },
   1.877 +  { 0x00010020, { 0xD800, 0xDC20 } },
   1.878 +  { 0x0001003F, { 0xD800, 0xDC3F } },
   1.879 +  { 0x00010040, { 0xD800, 0xDC40 } },
   1.880 +  { 0x0001007F, { 0xD800, 0xDC7F } },
   1.881 +  { 0x00010080, { 0xD800, 0xDC80 } },
   1.882 +  { 0x00010081, { 0xD800, 0xDC81 } },
   1.883 +  { 0x00010082, { 0xD800, 0xDC82 } },
   1.884 +  { 0x00010084, { 0xD800, 0xDC84 } },
   1.885 +  { 0x00010088, { 0xD800, 0xDC88 } },
   1.886 +  { 0x00010090, { 0xD800, 0xDC90 } },
   1.887 +  { 0x000100A0, { 0xD800, 0xDCA0 } },
   1.888 +  { 0x000100C0, { 0xD800, 0xDCC0 } },
   1.889 +  { 0x000100FF, { 0xD800, 0xDCFF } },
   1.890 +  { 0x00010100, { 0xD800, 0xDD00 } },
   1.891 +  { 0x00010101, { 0xD800, 0xDD01 } },
   1.892 +  { 0x00010102, { 0xD800, 0xDD02 } },
   1.893 +  { 0x00010104, { 0xD800, 0xDD04 } },
   1.894 +  { 0x00010108, { 0xD800, 0xDD08 } },
   1.895 +  { 0x00010110, { 0xD800, 0xDD10 } },
   1.896 +  { 0x00010120, { 0xD800, 0xDD20 } },
   1.897 +  { 0x00010140, { 0xD800, 0xDD40 } },
   1.898 +  { 0x00010180, { 0xD800, 0xDD80 } },
   1.899 +  { 0x000101FF, { 0xD800, 0xDDFF } },
   1.900 +  { 0x00010200, { 0xD800, 0xDE00 } },
   1.901 +  { 0x00010201, { 0xD800, 0xDE01 } },
   1.902 +  { 0x00010202, { 0xD800, 0xDE02 } },
   1.903 +  { 0x00010204, { 0xD800, 0xDE04 } },
   1.904 +  { 0x00010208, { 0xD800, 0xDE08 } },
   1.905 +  { 0x00010210, { 0xD800, 0xDE10 } },
   1.906 +  { 0x00010220, { 0xD800, 0xDE20 } },
   1.907 +  { 0x00010240, { 0xD800, 0xDE40 } },
   1.908 +  { 0x00010280, { 0xD800, 0xDE80 } },
   1.909 +  { 0x00010300, { 0xD800, 0xDF00 } },
   1.910 +  { 0x000103FF, { 0xD800, 0xDFFF } },
   1.911 +  { 0x00010400, { 0xD801, 0xDC00 } },
   1.912 +  { 0x00010401, { 0xD801, 0xDC01 } },
   1.913 +  { 0x00010402, { 0xD801, 0xDC02 } },
   1.914 +  { 0x00010404, { 0xD801, 0xDC04 } },
   1.915 +  { 0x00010408, { 0xD801, 0xDC08 } },
   1.916 +  { 0x00010410, { 0xD801, 0xDC10 } },
   1.917 +  { 0x00010420, { 0xD801, 0xDC20 } },
   1.918 +  { 0x00010440, { 0xD801, 0xDC40 } },
   1.919 +  { 0x00010480, { 0xD801, 0xDC80 } },
   1.920 +  { 0x00010500, { 0xD801, 0xDD00 } },
   1.921 +  { 0x00010600, { 0xD801, 0xDE00 } },
   1.922 +  { 0x000107FF, { 0xD801, 0xDFFF } },
   1.923 +  { 0x00010800, { 0xD802, 0xDC00 } },
   1.924 +  { 0x00010801, { 0xD802, 0xDC01 } },
   1.925 +  { 0x00010802, { 0xD802, 0xDC02 } },
   1.926 +  { 0x00010804, { 0xD802, 0xDC04 } },
   1.927 +  { 0x00010808, { 0xD802, 0xDC08 } },
   1.928 +  { 0x00010810, { 0xD802, 0xDC10 } },
   1.929 +  { 0x00010820, { 0xD802, 0xDC20 } },
   1.930 +  { 0x00010840, { 0xD802, 0xDC40 } },
   1.931 +  { 0x00010880, { 0xD802, 0xDC80 } },
   1.932 +  { 0x00010900, { 0xD802, 0xDD00 } },
   1.933 +  { 0x00010A00, { 0xD802, 0xDE00 } },
   1.934 +  { 0x00010C00, { 0xD803, 0xDC00 } },
   1.935 +  { 0x00010FFF, { 0xD803, 0xDFFF } },
   1.936 +  { 0x00011000, { 0xD804, 0xDC00 } },
   1.937 +  { 0x00011001, { 0xD804, 0xDC01 } },
   1.938 +  { 0x00011002, { 0xD804, 0xDC02 } },
   1.939 +  { 0x00011004, { 0xD804, 0xDC04 } },
   1.940 +  { 0x00011008, { 0xD804, 0xDC08 } },
   1.941 +  { 0x00011010, { 0xD804, 0xDC10 } },
   1.942 +  { 0x00011020, { 0xD804, 0xDC20 } },
   1.943 +  { 0x00011040, { 0xD804, 0xDC40 } },
   1.944 +  { 0x00011080, { 0xD804, 0xDC80 } },
   1.945 +  { 0x00011100, { 0xD804, 0xDD00 } },
   1.946 +  { 0x00011200, { 0xD804, 0xDE00 } },
   1.947 +  { 0x00011400, { 0xD805, 0xDC00 } },
   1.948 +  { 0x00011800, { 0xD806, 0xDC00 } },
   1.949 +  { 0x00011FFF, { 0xD807, 0xDFFF } },
   1.950 +  { 0x00012000, { 0xD808, 0xDC00 } },
   1.951 +  { 0x00012001, { 0xD808, 0xDC01 } },
   1.952 +  { 0x00012002, { 0xD808, 0xDC02 } },
   1.953 +  { 0x00012004, { 0xD808, 0xDC04 } },
   1.954 +  { 0x00012008, { 0xD808, 0xDC08 } },
   1.955 +  { 0x00012010, { 0xD808, 0xDC10 } },
   1.956 +  { 0x00012020, { 0xD808, 0xDC20 } },
   1.957 +  { 0x00012040, { 0xD808, 0xDC40 } },
   1.958 +  { 0x00012080, { 0xD808, 0xDC80 } },
   1.959 +  { 0x00012100, { 0xD808, 0xDD00 } },
   1.960 +  { 0x00012200, { 0xD808, 0xDE00 } },
   1.961 +  { 0x00012400, { 0xD809, 0xDC00 } },
   1.962 +  { 0x00012800, { 0xD80A, 0xDC00 } },
   1.963 +  { 0x00013000, { 0xD80C, 0xDC00 } },
   1.964 +  { 0x00013FFF, { 0xD80F, 0xDFFF } },
   1.965 +  { 0x00014000, { 0xD810, 0xDC00 } },
   1.966 +  { 0x00014001, { 0xD810, 0xDC01 } },
   1.967 +  { 0x00014002, { 0xD810, 0xDC02 } },
   1.968 +  { 0x00014004, { 0xD810, 0xDC04 } },
   1.969 +  { 0x00014008, { 0xD810, 0xDC08 } },
   1.970 +  { 0x00014010, { 0xD810, 0xDC10 } },
   1.971 +  { 0x00014020, { 0xD810, 0xDC20 } },
   1.972 +  { 0x00014040, { 0xD810, 0xDC40 } },
   1.973 +  { 0x00014080, { 0xD810, 0xDC80 } },
   1.974 +  { 0x00014100, { 0xD810, 0xDD00 } },
   1.975 +  { 0x00014200, { 0xD810, 0xDE00 } },
   1.976 +  { 0x00014400, { 0xD811, 0xDC00 } },
   1.977 +  { 0x00014800, { 0xD812, 0xDC00 } },
   1.978 +  { 0x00015000, { 0xD814, 0xDC00 } },
   1.979 +  { 0x00016000, { 0xD818, 0xDC00 } },
   1.980 +  { 0x00017FFF, { 0xD81F, 0xDFFF } },
   1.981 +  { 0x00018000, { 0xD820, 0xDC00 } },
   1.982 +  { 0x00018001, { 0xD820, 0xDC01 } },
   1.983 +  { 0x00018002, { 0xD820, 0xDC02 } },
   1.984 +  { 0x00018004, { 0xD820, 0xDC04 } },
   1.985 +  { 0x00018008, { 0xD820, 0xDC08 } },
   1.986 +  { 0x00018010, { 0xD820, 0xDC10 } },
   1.987 +  { 0x00018020, { 0xD820, 0xDC20 } },
   1.988 +  { 0x00018040, { 0xD820, 0xDC40 } },
   1.989 +  { 0x00018080, { 0xD820, 0xDC80 } },
   1.990 +  { 0x00018100, { 0xD820, 0xDD00 } },
   1.991 +  { 0x00018200, { 0xD820, 0xDE00 } },
   1.992 +  { 0x00018400, { 0xD821, 0xDC00 } },
   1.993 +  { 0x00018800, { 0xD822, 0xDC00 } },
   1.994 +  { 0x00019000, { 0xD824, 0xDC00 } },
   1.995 +  { 0x0001A000, { 0xD828, 0xDC00 } },
   1.996 +  { 0x0001C000, { 0xD830, 0xDC00 } },
   1.997 +  { 0x0001FFFF, { 0xD83F, 0xDFFF } },
   1.998 +  { 0x00020000, { 0xD840, 0xDC00 } },
   1.999 +  { 0x00020001, { 0xD840, 0xDC01 } },
  1.1000 +  { 0x00020002, { 0xD840, 0xDC02 } },
  1.1001 +  { 0x00020004, { 0xD840, 0xDC04 } },
  1.1002 +  { 0x00020008, { 0xD840, 0xDC08 } },
  1.1003 +  { 0x00020010, { 0xD840, 0xDC10 } },
  1.1004 +  { 0x00020020, { 0xD840, 0xDC20 } },
  1.1005 +  { 0x00020040, { 0xD840, 0xDC40 } },
  1.1006 +  { 0x00020080, { 0xD840, 0xDC80 } },
  1.1007 +  { 0x00020100, { 0xD840, 0xDD00 } },
  1.1008 +  { 0x00020200, { 0xD840, 0xDE00 } },
  1.1009 +  { 0x00020400, { 0xD841, 0xDC00 } },
  1.1010 +  { 0x00020800, { 0xD842, 0xDC00 } },
  1.1011 +  { 0x00021000, { 0xD844, 0xDC00 } },
  1.1012 +  { 0x00022000, { 0xD848, 0xDC00 } },
  1.1013 +  { 0x00024000, { 0xD850, 0xDC00 } },
  1.1014 +  { 0x00028000, { 0xD860, 0xDC00 } },
  1.1015 +  { 0x0002FFFF, { 0xD87F, 0xDFFF } },
  1.1016 +  { 0x00030000, { 0xD880, 0xDC00 } },
  1.1017 +  { 0x00030001, { 0xD880, 0xDC01 } },
  1.1018 +  { 0x00030002, { 0xD880, 0xDC02 } },
  1.1019 +  { 0x00030004, { 0xD880, 0xDC04 } },
  1.1020 +  { 0x00030008, { 0xD880, 0xDC08 } },
  1.1021 +  { 0x00030010, { 0xD880, 0xDC10 } },
  1.1022 +  { 0x00030020, { 0xD880, 0xDC20 } },
  1.1023 +  { 0x00030040, { 0xD880, 0xDC40 } },
  1.1024 +  { 0x00030080, { 0xD880, 0xDC80 } },
  1.1025 +  { 0x00030100, { 0xD880, 0xDD00 } },
  1.1026 +  { 0x00030200, { 0xD880, 0xDE00 } },
  1.1027 +  { 0x00030400, { 0xD881, 0xDC00 } },
  1.1028 +  { 0x00030800, { 0xD882, 0xDC00 } },
  1.1029 +  { 0x00031000, { 0xD884, 0xDC00 } },
  1.1030 +  { 0x00032000, { 0xD888, 0xDC00 } },
  1.1031 +  { 0x00034000, { 0xD890, 0xDC00 } },
  1.1032 +  { 0x00038000, { 0xD8A0, 0xDC00 } },
  1.1033 +  { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
  1.1034 +  { 0x00040000, { 0xD8C0, 0xDC00 } },
  1.1035 +  { 0x00040001, { 0xD8C0, 0xDC01 } },
  1.1036 +  { 0x00040002, { 0xD8C0, 0xDC02 } },
  1.1037 +  { 0x00040004, { 0xD8C0, 0xDC04 } },
  1.1038 +  { 0x00040008, { 0xD8C0, 0xDC08 } },
  1.1039 +  { 0x00040010, { 0xD8C0, 0xDC10 } },
  1.1040 +  { 0x00040020, { 0xD8C0, 0xDC20 } },
  1.1041 +  { 0x00040040, { 0xD8C0, 0xDC40 } },
  1.1042 +  { 0x00040080, { 0xD8C0, 0xDC80 } },
  1.1043 +  { 0x00040100, { 0xD8C0, 0xDD00 } },
  1.1044 +  { 0x00040200, { 0xD8C0, 0xDE00 } },
  1.1045 +  { 0x00040400, { 0xD8C1, 0xDC00 } },
  1.1046 +  { 0x00040800, { 0xD8C2, 0xDC00 } },
  1.1047 +  { 0x00041000, { 0xD8C4, 0xDC00 } },
  1.1048 +  { 0x00042000, { 0xD8C8, 0xDC00 } },
  1.1049 +  { 0x00044000, { 0xD8D0, 0xDC00 } },
  1.1050 +  { 0x00048000, { 0xD8E0, 0xDC00 } },
  1.1051 +  { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
  1.1052 +  { 0x00050000, { 0xD900, 0xDC00 } },
  1.1053 +  { 0x00050001, { 0xD900, 0xDC01 } },
  1.1054 +  { 0x00050002, { 0xD900, 0xDC02 } },
  1.1055 +  { 0x00050004, { 0xD900, 0xDC04 } },
  1.1056 +  { 0x00050008, { 0xD900, 0xDC08 } },
  1.1057 +  { 0x00050010, { 0xD900, 0xDC10 } },
  1.1058 +  { 0x00050020, { 0xD900, 0xDC20 } },
  1.1059 +  { 0x00050040, { 0xD900, 0xDC40 } },
  1.1060 +  { 0x00050080, { 0xD900, 0xDC80 } },
  1.1061 +  { 0x00050100, { 0xD900, 0xDD00 } },
  1.1062 +  { 0x00050200, { 0xD900, 0xDE00 } },
  1.1063 +  { 0x00050400, { 0xD901, 0xDC00 } },
  1.1064 +  { 0x00050800, { 0xD902, 0xDC00 } },
  1.1065 +  { 0x00051000, { 0xD904, 0xDC00 } },
  1.1066 +  { 0x00052000, { 0xD908, 0xDC00 } },
  1.1067 +  { 0x00054000, { 0xD910, 0xDC00 } },
  1.1068 +  { 0x00058000, { 0xD920, 0xDC00 } },
  1.1069 +  { 0x00060000, { 0xD940, 0xDC00 } },
  1.1070 +  { 0x00070000, { 0xD980, 0xDC00 } },
  1.1071 +  { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
  1.1072 +  { 0x00080000, { 0xD9C0, 0xDC00 } },
  1.1073 +  { 0x00080001, { 0xD9C0, 0xDC01 } },
  1.1074 +  { 0x00080002, { 0xD9C0, 0xDC02 } },
  1.1075 +  { 0x00080004, { 0xD9C0, 0xDC04 } },
  1.1076 +  { 0x00080008, { 0xD9C0, 0xDC08 } },
  1.1077 +  { 0x00080010, { 0xD9C0, 0xDC10 } },
  1.1078 +  { 0x00080020, { 0xD9C0, 0xDC20 } },
  1.1079 +  { 0x00080040, { 0xD9C0, 0xDC40 } },
  1.1080 +  { 0x00080080, { 0xD9C0, 0xDC80 } },
  1.1081 +  { 0x00080100, { 0xD9C0, 0xDD00 } },
  1.1082 +  { 0x00080200, { 0xD9C0, 0xDE00 } },
  1.1083 +  { 0x00080400, { 0xD9C1, 0xDC00 } },
  1.1084 +  { 0x00080800, { 0xD9C2, 0xDC00 } },
  1.1085 +  { 0x00081000, { 0xD9C4, 0xDC00 } },
  1.1086 +  { 0x00082000, { 0xD9C8, 0xDC00 } },
  1.1087 +  { 0x00084000, { 0xD9D0, 0xDC00 } },
  1.1088 +  { 0x00088000, { 0xD9E0, 0xDC00 } },
  1.1089 +  { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
  1.1090 +  { 0x00090000, { 0xDA00, 0xDC00 } },
  1.1091 +  { 0x00090001, { 0xDA00, 0xDC01 } },
  1.1092 +  { 0x00090002, { 0xDA00, 0xDC02 } },
  1.1093 +  { 0x00090004, { 0xDA00, 0xDC04 } },
  1.1094 +  { 0x00090008, { 0xDA00, 0xDC08 } },
  1.1095 +  { 0x00090010, { 0xDA00, 0xDC10 } },
  1.1096 +  { 0x00090020, { 0xDA00, 0xDC20 } },
  1.1097 +  { 0x00090040, { 0xDA00, 0xDC40 } },
  1.1098 +  { 0x00090080, { 0xDA00, 0xDC80 } },
  1.1099 +  { 0x00090100, { 0xDA00, 0xDD00 } },
  1.1100 +  { 0x00090200, { 0xDA00, 0xDE00 } },
  1.1101 +  { 0x00090400, { 0xDA01, 0xDC00 } },
  1.1102 +  { 0x00090800, { 0xDA02, 0xDC00 } },
  1.1103 +  { 0x00091000, { 0xDA04, 0xDC00 } },
  1.1104 +  { 0x00092000, { 0xDA08, 0xDC00 } },
  1.1105 +  { 0x00094000, { 0xDA10, 0xDC00 } },
  1.1106 +  { 0x00098000, { 0xDA20, 0xDC00 } },
  1.1107 +  { 0x000A0000, { 0xDA40, 0xDC00 } },
  1.1108 +  { 0x000B0000, { 0xDA80, 0xDC00 } },
  1.1109 +  { 0x000C0000, { 0xDAC0, 0xDC00 } },
  1.1110 +  { 0x000D0000, { 0xDB00, 0xDC00 } },
  1.1111 +  { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
  1.1112 +  { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
  1.1113 +
  1.1114 +};
  1.1115 +
  1.1116 +/* illegal utf8 sequences */
  1.1117 +char *utf8_bad[] = {
  1.1118 +  "\xC0\x80",
  1.1119 +  "\xC1\xBF",
  1.1120 +  "\xE0\x80\x80",
  1.1121 +  "\xE0\x9F\xBF",
  1.1122 +  "\xF0\x80\x80\x80",
  1.1123 +  "\xF0\x8F\xBF\xBF",
  1.1124 +  "\xF4\x90\x80\x80",
  1.1125 +  "\xF7\xBF\xBF\xBF",
  1.1126 +  "\xF8\x80\x80\x80\x80",
  1.1127 +  "\xF8\x88\x80\x80\x80",
  1.1128 +  "\xF8\x92\x80\x80\x80",
  1.1129 +  "\xF8\x9F\xBF\xBF\xBF",
  1.1130 +  "\xF8\xA0\x80\x80\x80",
  1.1131 +  "\xF8\xA8\x80\x80\x80",
  1.1132 +  "\xF8\xB0\x80\x80\x80",
  1.1133 +  "\xF8\xBF\xBF\xBF\xBF",
  1.1134 +  "\xF9\x80\x80\x80\x88",
  1.1135 +  "\xF9\x84\x80\x80\x80",
  1.1136 +  "\xF9\xBF\xBF\xBF\xBF",
  1.1137 +  "\xFA\x80\x80\x80\x80",
  1.1138 +  "\xFA\x90\x80\x80\x80",
  1.1139 +  "\xFB\xBF\xBF\xBF\xBF",
  1.1140 +  "\xFC\x84\x80\x80\x80\x81",
  1.1141 +  "\xFC\x85\x80\x80\x80\x80",
  1.1142 +  "\xFC\x86\x80\x80\x80\x80",
  1.1143 +  "\xFC\x87\xBF\xBF\xBF\xBF",
  1.1144 +  "\xFC\x88\xA0\x80\x80\x80",
  1.1145 +  "\xFC\x89\x80\x80\x80\x80",
  1.1146 +  "\xFC\x8A\x80\x80\x80\x80",
  1.1147 +  "\xFC\x90\x80\x80\x80\x82",
  1.1148 +  "\xFD\x80\x80\x80\x80\x80",
  1.1149 +  "\xFD\xBF\xBF\xBF\xBF\xBF",
  1.1150 +  "\x80",
  1.1151 +  "\xC3",
  1.1152 +  "\xC3\xC3\x80",
  1.1153 +  "\xED\xA0\x80",
  1.1154 +  "\xED\xBF\x80",
  1.1155 +  "\xED\xBF\xBF",
  1.1156 +  "\xED\xA0\x80\xE0\xBF\xBF",
  1.1157 +};
  1.1158 +
  1.1159 +static void
  1.1160 +dump_utf8
  1.1161 +(
  1.1162 +  char *word,
  1.1163 +  unsigned char *utf8,
  1.1164 +  char *end
  1.1165 +)
  1.1166 +{
  1.1167 +  fprintf(stdout, "%s ", word);
  1.1168 +  for( ; *utf8; utf8++ ) {
  1.1169 +    fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
  1.1170 +  }
  1.1171 +  fprintf(stdout, "%s", end);
  1.1172 +}
  1.1173 +
  1.1174 +static PRBool
  1.1175 +test_ucs4_chars
  1.1176 +(
  1.1177 +  void
  1.1178 +)
  1.1179 +{
  1.1180 +  PRBool rv = PR_TRUE;
  1.1181 +  int i;
  1.1182 +
  1.1183 +  for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1.1184 +    struct ucs4 *e = &ucs4[i];
  1.1185 +    PRBool result;
  1.1186 +    unsigned char utf8[8];
  1.1187 +    unsigned int len = 0;
  1.1188 +    PRUint32 back = 0;
  1.1189 +
  1.1190 +    (void)memset(utf8, 0, sizeof(utf8));
  1.1191 +    
  1.1192 +    result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 
  1.1193 +      (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
  1.1194 +
  1.1195 +    if( !result ) {
  1.1196 +      fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
  1.1197 +      rv = PR_FALSE;
  1.1198 +      continue;
  1.1199 +    }
  1.1200 +
  1.1201 +    if( (len >= sizeof(utf8)) ||
  1.1202 +        (strlen(e->utf8) != len) ||
  1.1203 +        (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
  1.1204 +      fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
  1.1205 +      dump_utf8("expected", e->utf8, ", ");
  1.1206 +      dump_utf8("received", utf8, "\n");
  1.1207 +      rv = PR_FALSE;
  1.1208 +      continue;
  1.1209 +    }
  1.1210 +
  1.1211 +    result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1.1212 +      utf8, len, (unsigned char *)&back, sizeof(back), &len);
  1.1213 +
  1.1214 +    if( !result ) {
  1.1215 +      dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
  1.1216 +      rv = PR_FALSE;
  1.1217 +      continue;
  1.1218 +    }
  1.1219 +
  1.1220 +    if( (sizeof(back) != len) || (e->c != back) ) {
  1.1221 +      dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
  1.1222 +      fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
  1.1223 +      rv = PR_FALSE;
  1.1224 +      continue;
  1.1225 +    }
  1.1226 +  }
  1.1227 +
  1.1228 +  return rv;
  1.1229 +}
  1.1230 +
  1.1231 +static PRBool
  1.1232 +test_ucs2_chars
  1.1233 +(
  1.1234 +  void
  1.1235 +)
  1.1236 +{
  1.1237 +  PRBool rv = PR_TRUE;
  1.1238 +  int i;
  1.1239 +
  1.1240 +  for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1.1241 +    struct ucs2 *e = &ucs2[i];
  1.1242 +    PRBool result;
  1.1243 +    unsigned char utf8[8];
  1.1244 +    unsigned int len = 0;
  1.1245 +    PRUint16 back = 0;
  1.1246 +
  1.1247 +    (void)memset(utf8, 0, sizeof(utf8));
  1.1248 +    
  1.1249 +    result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
  1.1250 +      (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
  1.1251 +
  1.1252 +    if( !result ) {
  1.1253 +      fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
  1.1254 +      rv = PR_FALSE;
  1.1255 +      continue;
  1.1256 +    }
  1.1257 +
  1.1258 +    if( (len >= sizeof(utf8)) ||
  1.1259 +        (strlen(e->utf8) != len) ||
  1.1260 +        (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
  1.1261 +      fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
  1.1262 +      dump_utf8("expected", e->utf8, ", ");
  1.1263 +      dump_utf8("received", utf8, "\n");
  1.1264 +      rv = PR_FALSE;
  1.1265 +      continue;
  1.1266 +    }
  1.1267 +
  1.1268 +    result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1.1269 +      utf8, len, (unsigned char *)&back, sizeof(back), &len);
  1.1270 +
  1.1271 +    if( !result ) {
  1.1272 +      dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
  1.1273 +      rv = PR_FALSE;
  1.1274 +      continue;
  1.1275 +    }
  1.1276 +
  1.1277 +    if( (sizeof(back) != len) || (e->c != back) ) {
  1.1278 +      dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
  1.1279 +      fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
  1.1280 +      rv = PR_FALSE;
  1.1281 +      continue;
  1.1282 +    }
  1.1283 +  }
  1.1284 +
  1.1285 +  return rv;
  1.1286 +}
  1.1287 +
  1.1288 +static PRBool
  1.1289 +test_utf16_chars
  1.1290 +(
  1.1291 +  void
  1.1292 +)
  1.1293 +{
  1.1294 +  PRBool rv = PR_TRUE;
  1.1295 +  int i;
  1.1296 +
  1.1297 +  for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
  1.1298 +    struct utf16 *e = &utf16[i];
  1.1299 +    PRBool result;
  1.1300 +    unsigned char utf8[8];
  1.1301 +    unsigned int len = 0;
  1.1302 +    PRUint32 back32 = 0;
  1.1303 +    PRUint16 back[2];
  1.1304 +
  1.1305 +    (void)memset(utf8, 0, sizeof(utf8));
  1.1306 +    
  1.1307 +    result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 
  1.1308 +      (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
  1.1309 +
  1.1310 +    if( !result ) {
  1.1311 +      fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", 
  1.1312 +              e->w[0], e->w[1]);
  1.1313 +      rv = PR_FALSE;
  1.1314 +      continue;
  1.1315 +    }
  1.1316 +
  1.1317 +    result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1.1318 +      utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
  1.1319 +
  1.1320 +    if( 4 != len ) {
  1.1321 +      fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
  1.1322 +              "unexpected len %d\n", e->w[0], e->w[1], len);
  1.1323 +      rv = PR_FALSE;
  1.1324 +      continue;
  1.1325 +    }
  1.1326 +
  1.1327 +    utf8[len] = '\0'; /* null-terminate for printing */
  1.1328 +
  1.1329 +    if( !result ) {
  1.1330 +      dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
  1.1331 +      rv = PR_FALSE;
  1.1332 +      continue;
  1.1333 +    }
  1.1334 +
  1.1335 +    if( (sizeof(back32) != len) || (e->c != back32) ) {
  1.1336 +      fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", 
  1.1337 +              e->w[0], e->w[1]);
  1.1338 +      dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
  1.1339 +      if( sizeof(back32) != len ) {
  1.1340 +        fprintf(stdout, "len is %d\n", len);
  1.1341 +      } else {
  1.1342 +        fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
  1.1343 +      }
  1.1344 +      rv = PR_FALSE;
  1.1345 +      continue;
  1.1346 +    }
  1.1347 +
  1.1348 +    (void)memset(utf8, 0, sizeof(utf8));
  1.1349 +    back[0] = back[1] = 0;
  1.1350 +
  1.1351 +    result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
  1.1352 +      (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
  1.1353 +
  1.1354 +    if( !result ) {
  1.1355 +      fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n",
  1.1356 +              e->c);
  1.1357 +      rv = PR_FALSE;
  1.1358 +      continue;
  1.1359 +    }
  1.1360 +
  1.1361 +    result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1.1362 +      utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
  1.1363 +
  1.1364 +    if( 4 != len ) {
  1.1365 +      fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
  1.1366 +              "unexpected len %d\n", e->c, len);
  1.1367 +      rv = PR_FALSE;
  1.1368 +      continue;
  1.1369 +    }
  1.1370 +
  1.1371 +    utf8[len] = '\0'; /* null-terminate for printing */
  1.1372 +
  1.1373 +    if( !result ) {
  1.1374 +      dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
  1.1375 +      rv = PR_FALSE;
  1.1376 +      continue;
  1.1377 +    }
  1.1378 +
  1.1379 +    if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
  1.1380 +      fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
  1.1381 +      dump_utf8("", utf8, "and then to UTF-16:");
  1.1382 +      if( sizeof(back) != len ) {
  1.1383 +        fprintf(stdout, "len is %d\n", len);
  1.1384 +      } else {
  1.1385 +        fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n",
  1.1386 +                e->w[0], e->w[1], back[0], back[1]);
  1.1387 +      }
  1.1388 +      rv = PR_FALSE;
  1.1389 +      continue;
  1.1390 +    }
  1.1391 +  }
  1.1392 +
  1.1393 +  return rv;
  1.1394 +}
  1.1395 +
  1.1396 +static PRBool
  1.1397 +test_utf8_bad_chars
  1.1398 +(
  1.1399 +  void
  1.1400 +)
  1.1401 +{
  1.1402 +  PRBool rv = PR_TRUE;
  1.1403 +  int i;
  1.1404 +
  1.1405 +  for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
  1.1406 +    PRBool result;
  1.1407 +    unsigned char destbuf[30];
  1.1408 +    unsigned int len = 0;
  1.1409 +
  1.1410 +    result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1.1411 +      (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
  1.1412 +
  1.1413 +    if( result ) {
  1.1414 +      dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n");
  1.1415 +      rv = PR_FALSE;
  1.1416 +      continue;
  1.1417 +    }
  1.1418 +    result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1.1419 +      (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
  1.1420 +
  1.1421 +    if( result ) {
  1.1422 +      dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n");
  1.1423 +      rv = PR_FALSE;
  1.1424 +      continue;
  1.1425 +    }
  1.1426 +
  1.1427 +  }
  1.1428 +
  1.1429 +  return rv;
  1.1430 +}
  1.1431 +
  1.1432 +static PRBool
  1.1433 +test_iso88591_chars
  1.1434 +(
  1.1435 +  void
  1.1436 +)
  1.1437 +{
  1.1438 +  PRBool rv = PR_TRUE;
  1.1439 +  int i;
  1.1440 +
  1.1441 +  for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1.1442 +    struct ucs2 *e = &ucs2[i];
  1.1443 +    PRBool result;
  1.1444 +    unsigned char iso88591;
  1.1445 +    unsigned char utf8[3];
  1.1446 +    unsigned int len = 0;
  1.1447 +
  1.1448 +    if (ntohs(e->c) > 0xFF) continue;
  1.1449 +
  1.1450 +    (void)memset(utf8, 0, sizeof(utf8));
  1.1451 +    iso88591 = ntohs(e->c);
  1.1452 +    
  1.1453 +    result = sec_port_iso88591_utf8_conversion_function(&iso88591,
  1.1454 +      1, utf8, sizeof(utf8), &len);
  1.1455 +
  1.1456 +    if( !result ) {
  1.1457 +      fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591);
  1.1458 +      rv = PR_FALSE;
  1.1459 +      continue;
  1.1460 +    }
  1.1461 +
  1.1462 +    if( (len >= sizeof(utf8)) ||
  1.1463 +        (strlen(e->utf8) != len) ||
  1.1464 +        (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
  1.1465 +      fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591);
  1.1466 +      dump_utf8("expected", e->utf8, ", ");
  1.1467 +      dump_utf8("received", utf8, "\n");
  1.1468 +      rv = PR_FALSE;
  1.1469 +      continue;
  1.1470 +    }
  1.1471 +
  1.1472 +  }
  1.1473 +
  1.1474 +  return rv;
  1.1475 +}
  1.1476 +
  1.1477 +static PRBool
  1.1478 +test_zeroes
  1.1479 +(
  1.1480 +  void
  1.1481 +)
  1.1482 +{
  1.1483 +  PRBool rv = PR_TRUE;
  1.1484 +  PRBool result;
  1.1485 +  PRUint32 lzero = 0;
  1.1486 +  PRUint16 szero = 0;
  1.1487 +  unsigned char utf8[8];
  1.1488 +  unsigned int len = 0;
  1.1489 +  PRUint32 lback = 1;
  1.1490 +  PRUint16 sback = 1;
  1.1491 +
  1.1492 +  (void)memset(utf8, 1, sizeof(utf8));
  1.1493 +
  1.1494 +  result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 
  1.1495 +    (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
  1.1496 +
  1.1497 +  if( !result ) {
  1.1498 +    fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
  1.1499 +    rv = PR_FALSE;
  1.1500 +  } else if( 1 != len ) {
  1.1501 +    fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
  1.1502 +    rv = PR_FALSE;
  1.1503 +  } else if( '\0' != *utf8 ) {
  1.1504 +    fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
  1.1505 +            "received %02.2x\n", (unsigned int)*utf8);
  1.1506 +    rv = PR_FALSE;
  1.1507 +  }
  1.1508 +
  1.1509 +  result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1.1510 +    "", 1, (unsigned char *)&lback, sizeof(lback), &len);
  1.1511 +
  1.1512 +  if( !result ) {
  1.1513 +    fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
  1.1514 +    rv = PR_FALSE;
  1.1515 +  } else if( 4 != len ) {
  1.1516 +    fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
  1.1517 +    rv = PR_FALSE;
  1.1518 +  } else if( 0 != lback ) {
  1.1519 +    fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
  1.1520 +            "expected 0x00000000, received 0x%08.8x\n", lback);
  1.1521 +    rv = PR_FALSE;
  1.1522 +  }
  1.1523 +
  1.1524 +  (void)memset(utf8, 1, sizeof(utf8));
  1.1525 +
  1.1526 +  result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 
  1.1527 +    (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
  1.1528 +
  1.1529 +  if( !result ) {
  1.1530 +    fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
  1.1531 +    rv = PR_FALSE;
  1.1532 +  } else if( 1 != len ) {
  1.1533 +    fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
  1.1534 +    rv = PR_FALSE;
  1.1535 +  } else if( '\0' != *utf8 ) {
  1.1536 +    fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
  1.1537 +            "received %02.2x\n", (unsigned int)*utf8);
  1.1538 +    rv = PR_FALSE;
  1.1539 +  }
  1.1540 +
  1.1541 +  result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1.1542 +    "", 1, (unsigned char *)&sback, sizeof(sback), &len);
  1.1543 +
  1.1544 +  if( !result ) {
  1.1545 +    fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
  1.1546 +    rv = PR_FALSE;
  1.1547 +  } else if( 2 != len ) {
  1.1548 +    fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
  1.1549 +    rv = PR_FALSE;
  1.1550 +  } else if( 0 != sback ) {
  1.1551 +    fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
  1.1552 +            "expected 0x0000, received 0x%04.4x\n", sback);
  1.1553 +    rv = PR_FALSE;
  1.1554 +  }
  1.1555 +
  1.1556 +  return rv;
  1.1557 +}
  1.1558 +
  1.1559 +static PRBool
  1.1560 +test_multichars
  1.1561 +(
  1.1562 +  void
  1.1563 +)
  1.1564 +{
  1.1565 +  int i;
  1.1566 +  unsigned int len, lenout;
  1.1567 +  PRUint32 *ucs4s;
  1.1568 +  char *ucs4_utf8;
  1.1569 +  PRUint16 *ucs2s;
  1.1570 +  char *ucs2_utf8;
  1.1571 +  void *tmp;
  1.1572 +  PRBool result;
  1.1573 +
  1.1574 +  ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
  1.1575 +  ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
  1.1576 +
  1.1577 +  if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
  1.1578 +    fprintf(stderr, "out of memory\n");
  1.1579 +    exit(1);
  1.1580 +  }
  1.1581 +
  1.1582 +  len = 0;
  1.1583 +  for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1.1584 +    ucs4s[i] = ucs4[i].c;
  1.1585 +    len += strlen(ucs4[i].utf8);
  1.1586 +  }
  1.1587 +
  1.1588 +  ucs4_utf8 = (char *)malloc(len);
  1.1589 +
  1.1590 +  len = 0;
  1.1591 +  for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1.1592 +    ucs2s[i] = ucs2[i].c;
  1.1593 +    len += strlen(ucs2[i].utf8);
  1.1594 +  }
  1.1595 +
  1.1596 +  ucs2_utf8 = (char *)malloc(len);
  1.1597 +
  1.1598 +  if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
  1.1599 +    fprintf(stderr, "out of memory\n");
  1.1600 +    exit(1);
  1.1601 +  }
  1.1602 +
  1.1603 +  *ucs4_utf8 = '\0';
  1.1604 +  for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1.1605 +    strcat(ucs4_utf8, ucs4[i].utf8);
  1.1606 +  }
  1.1607 +
  1.1608 +  *ucs2_utf8 = '\0';
  1.1609 +  for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1.1610 +    strcat(ucs2_utf8, ucs2[i].utf8);
  1.1611 +  }
  1.1612 +
  1.1613 +  /* UTF-8 -> UCS-4 */
  1.1614 +  len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
  1.1615 +  tmp = calloc(len, 1);
  1.1616 +  if( (void *)NULL == tmp ) {
  1.1617 +    fprintf(stderr, "out of memory\n");
  1.1618 +    exit(1);
  1.1619 +  }
  1.1620 +
  1.1621 +  result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
  1.1622 +    ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
  1.1623 +  if( !result ) {
  1.1624 +    fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
  1.1625 +    goto done;
  1.1626 +  }
  1.1627 +
  1.1628 +  if( lenout != len ) {
  1.1629 +    fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
  1.1630 +    goto loser;
  1.1631 +  }
  1.1632 +
  1.1633 +  if( 0 != memcmp(ucs4s, tmp, len) ) {
  1.1634 +    fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
  1.1635 +    goto loser;
  1.1636 +  }
  1.1637 +
  1.1638 +  free(tmp); tmp = (void *)NULL;
  1.1639 +
  1.1640 +  /* UCS-4 -> UTF-8 */
  1.1641 +  len = strlen(ucs4_utf8);
  1.1642 +  tmp = calloc(len, 1);
  1.1643 +  if( (void *)NULL == tmp ) {
  1.1644 +    fprintf(stderr, "out of memory\n");
  1.1645 +    exit(1);
  1.1646 +  }
  1.1647 +
  1.1648 +  result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
  1.1649 +    (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), 
  1.1650 +    tmp, len, &lenout);
  1.1651 +  if( !result ) {
  1.1652 +    fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
  1.1653 +    goto done;
  1.1654 +  }
  1.1655 +
  1.1656 +  if( lenout != len ) {
  1.1657 +    fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
  1.1658 +    goto loser;
  1.1659 +  }
  1.1660 +
  1.1661 +  if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
  1.1662 +    fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
  1.1663 +    goto loser;
  1.1664 +  }
  1.1665 +
  1.1666 +  free(tmp); tmp = (void *)NULL;
  1.1667 +
  1.1668 +  /* UTF-8 -> UCS-2 */
  1.1669 +  len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
  1.1670 +  tmp = calloc(len, 1);
  1.1671 +  if( (void *)NULL == tmp ) {
  1.1672 +    fprintf(stderr, "out of memory\n");
  1.1673 +    exit(1);
  1.1674 +  }
  1.1675 +
  1.1676 +  result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
  1.1677 +    ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
  1.1678 +  if( !result ) {
  1.1679 +    fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
  1.1680 +    goto done;
  1.1681 +  }
  1.1682 +
  1.1683 +  if( lenout != len ) {
  1.1684 +    fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
  1.1685 +    goto loser;
  1.1686 +  }
  1.1687 +
  1.1688 +  if( 0 != memcmp(ucs2s, tmp, len) ) {
  1.1689 +    fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
  1.1690 +    goto loser;
  1.1691 +  }
  1.1692 +
  1.1693 +  free(tmp); tmp = (void *)NULL;
  1.1694 +
  1.1695 +  /* UCS-2 -> UTF-8 */
  1.1696 +  len = strlen(ucs2_utf8);
  1.1697 +  tmp = calloc(len, 1);
  1.1698 +  if( (void *)NULL == tmp ) {
  1.1699 +    fprintf(stderr, "out of memory\n");
  1.1700 +    exit(1);
  1.1701 +  }
  1.1702 +
  1.1703 +  result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
  1.1704 +    (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), 
  1.1705 +    tmp, len, &lenout);
  1.1706 +  if( !result ) {
  1.1707 +    fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
  1.1708 +    goto done;
  1.1709 +  }
  1.1710 +
  1.1711 +  if( lenout != len ) {
  1.1712 +    fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
  1.1713 +    goto loser;
  1.1714 +  }
  1.1715 +
  1.1716 +  if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
  1.1717 +    fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
  1.1718 +    goto loser;
  1.1719 +  }
  1.1720 +
  1.1721 +  /* implement UTF16 */
  1.1722 +
  1.1723 +  result = PR_TRUE;
  1.1724 +  goto done;
  1.1725 +
  1.1726 + loser:
  1.1727 +  result = PR_FALSE;
  1.1728 + done:
  1.1729 +  free(ucs4s);
  1.1730 +  free(ucs4_utf8);
  1.1731 +  free(ucs2s);
  1.1732 +  free(ucs2_utf8);
  1.1733 +  if( (void *)NULL != tmp ) free(tmp);
  1.1734 +  return result;
  1.1735 +}
  1.1736 +
  1.1737 +void
  1.1738 +byte_order
  1.1739 +(
  1.1740 +  void
  1.1741 +)
  1.1742 +{
  1.1743 +  /*
  1.1744 +   * The implementation (now) expects the 16- and 32-bit characters
  1.1745 +   * to be in network byte order, not host byte order.  Therefore I
  1.1746 +   * have to byteswap all those test vectors above.  hton[ls] may be
  1.1747 +   * functions, so I have to do this dynamically.  If you want to 
  1.1748 +   * use this code to do host byte order conversions, just remove
  1.1749 +   * the call in main() to this function.
  1.1750 +   */
  1.1751 +
  1.1752 +  int i;
  1.1753 +
  1.1754 +  for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
  1.1755 +    struct ucs4 *e = &ucs4[i];
  1.1756 +    e->c = htonl(e->c);
  1.1757 +  }
  1.1758 +
  1.1759 +  for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
  1.1760 +    struct ucs2 *e = &ucs2[i];
  1.1761 +    e->c = htons(e->c);
  1.1762 +  }
  1.1763 +
  1.1764 +  for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
  1.1765 +    struct utf16 *e = &utf16[i];
  1.1766 +    e->c = htonl(e->c);
  1.1767 +    e->w[0] = htons(e->w[0]);
  1.1768 +    e->w[1] = htons(e->w[1]);
  1.1769 +  }
  1.1770 +
  1.1771 +  return;
  1.1772 +}
  1.1773 +
  1.1774 +int
  1.1775 +main
  1.1776 +(
  1.1777 +  int argc,
  1.1778 +  char *argv[]
  1.1779 +)
  1.1780 +{
  1.1781 +  byte_order();
  1.1782 +
  1.1783 +  if( test_ucs4_chars() &&
  1.1784 +      test_ucs2_chars() &&
  1.1785 +      test_utf16_chars() &&
  1.1786 +      test_utf8_bad_chars() &&
  1.1787 +      test_iso88591_chars() &&
  1.1788 +      test_zeroes() &&
  1.1789 +      test_multichars() &&
  1.1790 +      PR_TRUE ) {
  1.1791 +    fprintf(stderr, "PASS\n");
  1.1792 +    return 1;
  1.1793 +  } else {
  1.1794 +    fprintf(stderr, "FAIL\n");
  1.1795 +    return 0;
  1.1796 +  }
  1.1797 +}
  1.1798 +
  1.1799 +#endif /* TEST_UTF8 */

mercurial