security/nss/lib/util/utf8.c

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 2 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 4
michael@0 5 #include "seccomon.h"
michael@0 6 #include "secport.h"
michael@0 7
michael@0 8 #ifdef TEST_UTF8
michael@0 9 #include <assert.h>
michael@0 10 #undef PORT_Assert
michael@0 11 #define PORT_Assert assert
michael@0 12 #endif
michael@0 13
michael@0 14 /*
michael@0 15 * From RFC 2044:
michael@0 16 *
michael@0 17 * UCS-4 range (hex.) UTF-8 octet sequence (binary)
michael@0 18 * 0000 0000-0000 007F 0xxxxxxx
michael@0 19 * 0000 0080-0000 07FF 110xxxxx 10xxxxxx
michael@0 20 * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
michael@0 21 * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
michael@0 22 * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
michael@0 23 * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx
michael@0 24 */
michael@0 25
michael@0 26 /*
michael@0 27 * From http://www.imc.org/draft-hoffman-utf16
michael@0 28 *
michael@0 29 * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000
michael@0 30 *
michael@0 31 * U' = yyyyyyyyyyxxxxxxxxxx
michael@0 32 * W1 = 110110yyyyyyyyyy
michael@0 33 * W2 = 110111xxxxxxxxxx
michael@0 34 */
michael@0 35
michael@0 36 /*
michael@0 37 * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit
michael@0 38 * character values. If you wish to use this code for working with
michael@0 39 * host byte order values, define the following:
michael@0 40 *
michael@0 41 * #if IS_BIG_ENDIAN
michael@0 42 * #define L_0 0
michael@0 43 * #define L_1 1
michael@0 44 * #define L_2 2
michael@0 45 * #define L_3 3
michael@0 46 * #define H_0 0
michael@0 47 * #define H_1 1
michael@0 48 * #else / * not everyone has elif * /
michael@0 49 * #if IS_LITTLE_ENDIAN
michael@0 50 * #define L_0 3
michael@0 51 * #define L_1 2
michael@0 52 * #define L_2 1
michael@0 53 * #define L_3 0
michael@0 54 * #define H_0 1
michael@0 55 * #define H_1 0
michael@0 56 * #else
michael@0 57 * #error "PDP and NUXI support deferred"
michael@0 58 * #endif / * IS_LITTLE_ENDIAN * /
michael@0 59 * #endif / * IS_BIG_ENDIAN * /
michael@0 60 */
michael@0 61
michael@0 62 #define L_0 0
michael@0 63 #define L_1 1
michael@0 64 #define L_2 2
michael@0 65 #define L_3 3
michael@0 66 #define H_0 0
michael@0 67 #define H_1 1
michael@0 68
michael@0 69 #define BAD_UTF8 ((PRUint32)-1)
michael@0 70
michael@0 71 /*
michael@0 72 * Parse a single UTF-8 character per the spec. in section 3.9 (D36)
michael@0 73 * of Unicode 4.0.0.
michael@0 74 *
michael@0 75 * Parameters:
michael@0 76 * index - Points to the byte offset in inBuf of character to read. On success,
michael@0 77 * updated to the offset of the following character.
michael@0 78 * inBuf - Input buffer, UTF-8 encoded
michael@0 79 * inbufLen - Length of input buffer, in bytes.
michael@0 80 *
michael@0 81 * Returns:
michael@0 82 * Success - The UCS4 encoded character
michael@0 83 * Failure - BAD_UTF8
michael@0 84 */
michael@0 85 static PRUint32
michael@0 86 sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen)
michael@0 87 {
michael@0 88 PRUint32 result;
michael@0 89 unsigned int i = *index;
michael@0 90 int bytes_left;
michael@0 91 PRUint32 min_value;
michael@0 92
michael@0 93 PORT_Assert(i < inBufLen);
michael@0 94
michael@0 95 if ( (inBuf[i] & 0x80) == 0x00 ) {
michael@0 96 result = inBuf[i++];
michael@0 97 bytes_left = 0;
michael@0 98 min_value = 0;
michael@0 99 } else if ( (inBuf[i] & 0xE0) == 0xC0 ) {
michael@0 100 result = inBuf[i++] & 0x1F;
michael@0 101 bytes_left = 1;
michael@0 102 min_value = 0x80;
michael@0 103 } else if ( (inBuf[i] & 0xF0) == 0xE0) {
michael@0 104 result = inBuf[i++] & 0x0F;
michael@0 105 bytes_left = 2;
michael@0 106 min_value = 0x800;
michael@0 107 } else if ( (inBuf[i] & 0xF8) == 0xF0) {
michael@0 108 result = inBuf[i++] & 0x07;
michael@0 109 bytes_left = 3;
michael@0 110 min_value = 0x10000;
michael@0 111 } else {
michael@0 112 return BAD_UTF8;
michael@0 113 }
michael@0 114
michael@0 115 while (bytes_left--) {
michael@0 116 if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8;
michael@0 117 result = (result << 6) | (inBuf[i++] & 0x3F);
michael@0 118 }
michael@0 119
michael@0 120 /* Check for overlong sequences, surrogates, and outside unicode range */
michael@0 121 if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) {
michael@0 122 return BAD_UTF8;
michael@0 123 }
michael@0 124
michael@0 125 *index = i;
michael@0 126 return result;
michael@0 127 }
michael@0 128
michael@0 129 PRBool
michael@0 130 sec_port_ucs4_utf8_conversion_function
michael@0 131 (
michael@0 132 PRBool toUnicode,
michael@0 133 unsigned char *inBuf,
michael@0 134 unsigned int inBufLen,
michael@0 135 unsigned char *outBuf,
michael@0 136 unsigned int maxOutBufLen,
michael@0 137 unsigned int *outBufLen
michael@0 138 )
michael@0 139 {
michael@0 140 PORT_Assert((unsigned int *)NULL != outBufLen);
michael@0 141
michael@0 142 if( toUnicode ) {
michael@0 143 unsigned int i, len = 0;
michael@0 144
michael@0 145 for( i = 0; i < inBufLen; ) {
michael@0 146 if( (inBuf[i] & 0x80) == 0x00 ) i += 1;
michael@0 147 else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2;
michael@0 148 else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3;
michael@0 149 else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4;
michael@0 150 else return PR_FALSE;
michael@0 151
michael@0 152 len += 4;
michael@0 153 }
michael@0 154
michael@0 155 if( len > maxOutBufLen ) {
michael@0 156 *outBufLen = len;
michael@0 157 return PR_FALSE;
michael@0 158 }
michael@0 159
michael@0 160 len = 0;
michael@0 161
michael@0 162 for( i = 0; i < inBufLen; ) {
michael@0 163 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
michael@0 164
michael@0 165 if (ucs4 == BAD_UTF8) return PR_FALSE;
michael@0 166
michael@0 167 outBuf[len+L_0] = 0x00;
michael@0 168 outBuf[len+L_1] = (unsigned char)(ucs4 >> 16);
michael@0 169 outBuf[len+L_2] = (unsigned char)(ucs4 >> 8);
michael@0 170 outBuf[len+L_3] = (unsigned char)ucs4;
michael@0 171
michael@0 172 len += 4;
michael@0 173 }
michael@0 174
michael@0 175 *outBufLen = len;
michael@0 176 return PR_TRUE;
michael@0 177 } else {
michael@0 178 unsigned int i, len = 0;
michael@0 179 PORT_Assert((inBufLen % 4) == 0);
michael@0 180 if ((inBufLen % 4) != 0) {
michael@0 181 *outBufLen = 0;
michael@0 182 return PR_FALSE;
michael@0 183 }
michael@0 184
michael@0 185 for( i = 0; i < inBufLen; i += 4 ) {
michael@0 186 if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) {
michael@0 187 *outBufLen = 0;
michael@0 188 return PR_FALSE;
michael@0 189 } else if( inBuf[i+L_1] >= 0x01 ) len += 4;
michael@0 190 else if( inBuf[i+L_2] >= 0x08 ) len += 3;
michael@0 191 else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2;
michael@0 192 else len += 1;
michael@0 193 }
michael@0 194
michael@0 195 if( len > maxOutBufLen ) {
michael@0 196 *outBufLen = len;
michael@0 197 return PR_FALSE;
michael@0 198 }
michael@0 199
michael@0 200 len = 0;
michael@0 201
michael@0 202 for( i = 0; i < inBufLen; i += 4 ) {
michael@0 203 if( inBuf[i+L_1] >= 0x01 ) {
michael@0 204 /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
michael@0 205 /* 00000000 000abcde fghijklm nopqrstu ->
michael@0 206 11110abc 10defghi 10jklmno 10pqrstu */
michael@0 207
michael@0 208 outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2);
michael@0 209 outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4)
michael@0 210 | ((inBuf[i+L_2] & 0xF0) >> 4);
michael@0 211 outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
michael@0 212 | ((inBuf[i+L_3] & 0xC0) >> 6);
michael@0 213 outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
michael@0 214
michael@0 215 len += 4;
michael@0 216 } else if( inBuf[i+L_2] >= 0x08 ) {
michael@0 217 /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
michael@0 218 /* 00000000 00000000 abcdefgh ijklmnop ->
michael@0 219 1110abcd 10efghij 10klmnop */
michael@0 220
michael@0 221 outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4);
michael@0 222 outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2)
michael@0 223 | ((inBuf[i+L_3] & 0xC0) >> 6);
michael@0 224 outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
michael@0 225
michael@0 226 len += 3;
michael@0 227 } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) {
michael@0 228 /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
michael@0 229 /* 00000000 00000000 00000abc defghijk ->
michael@0 230 110abcde 10fghijk */
michael@0 231
michael@0 232 outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2)
michael@0 233 | ((inBuf[i+L_3] & 0xC0) >> 6);
michael@0 234 outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0);
michael@0 235
michael@0 236 len += 2;
michael@0 237 } else {
michael@0 238 /* 0000 0000-0000 007F -> 0xxxxxx */
michael@0 239 /* 00000000 00000000 00000000 0abcdefg ->
michael@0 240 0abcdefg */
michael@0 241
michael@0 242 outBuf[len+0] = (inBuf[i+L_3] & 0x7F);
michael@0 243
michael@0 244 len += 1;
michael@0 245 }
michael@0 246 }
michael@0 247
michael@0 248 *outBufLen = len;
michael@0 249 return PR_TRUE;
michael@0 250 }
michael@0 251 }
michael@0 252
michael@0 253 PRBool
michael@0 254 sec_port_ucs2_utf8_conversion_function
michael@0 255 (
michael@0 256 PRBool toUnicode,
michael@0 257 unsigned char *inBuf,
michael@0 258 unsigned int inBufLen,
michael@0 259 unsigned char *outBuf,
michael@0 260 unsigned int maxOutBufLen,
michael@0 261 unsigned int *outBufLen
michael@0 262 )
michael@0 263 {
michael@0 264 PORT_Assert((unsigned int *)NULL != outBufLen);
michael@0 265
michael@0 266 if( toUnicode ) {
michael@0 267 unsigned int i, len = 0;
michael@0 268
michael@0 269 for( i = 0; i < inBufLen; ) {
michael@0 270 if( (inBuf[i] & 0x80) == 0x00 ) {
michael@0 271 i += 1;
michael@0 272 len += 2;
michael@0 273 } else if( (inBuf[i] & 0xE0) == 0xC0 ) {
michael@0 274 i += 2;
michael@0 275 len += 2;
michael@0 276 } else if( (inBuf[i] & 0xF0) == 0xE0 ) {
michael@0 277 i += 3;
michael@0 278 len += 2;
michael@0 279 } else if( (inBuf[i] & 0xF8) == 0xF0 ) {
michael@0 280 i += 4;
michael@0 281 len += 4;
michael@0 282 } else return PR_FALSE;
michael@0 283 }
michael@0 284
michael@0 285 if( len > maxOutBufLen ) {
michael@0 286 *outBufLen = len;
michael@0 287 return PR_FALSE;
michael@0 288 }
michael@0 289
michael@0 290 len = 0;
michael@0 291
michael@0 292 for( i = 0; i < inBufLen; ) {
michael@0 293 PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen);
michael@0 294
michael@0 295 if (ucs4 == BAD_UTF8) return PR_FALSE;
michael@0 296
michael@0 297 if( ucs4 < 0x10000) {
michael@0 298 outBuf[len+H_0] = (unsigned char)(ucs4 >> 8);
michael@0 299 outBuf[len+H_1] = (unsigned char)ucs4;
michael@0 300 len += 2;
michael@0 301 } else {
michael@0 302 ucs4 -= 0x10000;
michael@0 303 outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3));
michael@0 304 outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10);
michael@0 305 outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3));
michael@0 306 outBuf[len+2+H_1] = (unsigned char)ucs4;
michael@0 307 len += 4;
michael@0 308 }
michael@0 309 }
michael@0 310
michael@0 311 *outBufLen = len;
michael@0 312 return PR_TRUE;
michael@0 313 } else {
michael@0 314 unsigned int i, len = 0;
michael@0 315 PORT_Assert((inBufLen % 2) == 0);
michael@0 316 if ((inBufLen % 2) != 0) {
michael@0 317 *outBufLen = 0;
michael@0 318 return PR_FALSE;
michael@0 319 }
michael@0 320
michael@0 321 for( i = 0; i < inBufLen; i += 2 ) {
michael@0 322 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1;
michael@0 323 else if( inBuf[i+H_0] < 0x08 ) len += 2;
michael@0 324 else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) {
michael@0 325 if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) {
michael@0 326 i += 2;
michael@0 327 len += 4;
michael@0 328 } else {
michael@0 329 return PR_FALSE;
michael@0 330 }
michael@0 331 }
michael@0 332 else len += 3;
michael@0 333 }
michael@0 334
michael@0 335 if( len > maxOutBufLen ) {
michael@0 336 *outBufLen = len;
michael@0 337 return PR_FALSE;
michael@0 338 }
michael@0 339
michael@0 340 len = 0;
michael@0 341
michael@0 342 for( i = 0; i < inBufLen; i += 2 ) {
michael@0 343 if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) {
michael@0 344 /* 0000-007F -> 0xxxxxx */
michael@0 345 /* 00000000 0abcdefg -> 0abcdefg */
michael@0 346
michael@0 347 outBuf[len] = inBuf[i+H_1] & 0x7F;
michael@0 348
michael@0 349 len += 1;
michael@0 350 } else if( inBuf[i+H_0] < 0x08 ) {
michael@0 351 /* 0080-07FF -> 110xxxxx 10xxxxxx */
michael@0 352 /* 00000abc defghijk -> 110abcde 10fghijk */
michael@0 353
michael@0 354 outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2)
michael@0 355 | ((inBuf[i+H_1] & 0xC0) >> 6);
michael@0 356 outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
michael@0 357
michael@0 358 len += 2;
michael@0 359 } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) {
michael@0 360 int abcde, BCDE;
michael@0 361
michael@0 362 PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2));
michael@0 363
michael@0 364 /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
michael@0 365 /* 110110BC DEfghijk 110111lm nopqrstu ->
michael@0 366 { Let abcde = BCDE + 1 }
michael@0 367 11110abc 10defghi 10jklmno 10pqrstu */
michael@0 368
michael@0 369 BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6);
michael@0 370 abcde = BCDE + 1;
michael@0 371
michael@0 372 outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2);
michael@0 373 outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4)
michael@0 374 | ((inBuf[i+0+H_1] & 0x3C) >> 2);
michael@0 375 outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4)
michael@0 376 | ((inBuf[i+2+H_0] & 0x03) << 2)
michael@0 377 | ((inBuf[i+2+H_1] & 0xC0) >> 6);
michael@0 378 outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0);
michael@0 379
michael@0 380 i += 2;
michael@0 381 len += 4;
michael@0 382 } else {
michael@0 383 /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
michael@0 384 /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */
michael@0 385
michael@0 386 outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4);
michael@0 387 outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2)
michael@0 388 | ((inBuf[i+H_1] & 0xC0) >> 6);
michael@0 389 outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0);
michael@0 390
michael@0 391 len += 3;
michael@0 392 }
michael@0 393 }
michael@0 394
michael@0 395 *outBufLen = len;
michael@0 396 return PR_TRUE;
michael@0 397 }
michael@0 398 }
michael@0 399
michael@0 400 PRBool
michael@0 401 sec_port_iso88591_utf8_conversion_function
michael@0 402 (
michael@0 403 const unsigned char *inBuf,
michael@0 404 unsigned int inBufLen,
michael@0 405 unsigned char *outBuf,
michael@0 406 unsigned int maxOutBufLen,
michael@0 407 unsigned int *outBufLen
michael@0 408 )
michael@0 409 {
michael@0 410 unsigned int i, len = 0;
michael@0 411
michael@0 412 PORT_Assert((unsigned int *)NULL != outBufLen);
michael@0 413
michael@0 414 for( i = 0; i < inBufLen; i++) {
michael@0 415 if( (inBuf[i] & 0x80) == 0x00 ) len += 1;
michael@0 416 else len += 2;
michael@0 417 }
michael@0 418
michael@0 419 if( len > maxOutBufLen ) {
michael@0 420 *outBufLen = len;
michael@0 421 return PR_FALSE;
michael@0 422 }
michael@0 423
michael@0 424 len = 0;
michael@0 425
michael@0 426 for( i = 0; i < inBufLen; i++) {
michael@0 427 if( (inBuf[i] & 0x80) == 0x00 ) {
michael@0 428 /* 00-7F -> 0xxxxxxx */
michael@0 429 /* 0abcdefg -> 0abcdefg */
michael@0 430
michael@0 431 outBuf[len] = inBuf[i];
michael@0 432 len += 1;
michael@0 433 } else {
michael@0 434 /* 80-FF <- 110xxxxx 10xxxxxx */
michael@0 435 /* 00000000 abcdefgh -> 110000ab 10cdefgh */
michael@0 436
michael@0 437 outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6);
michael@0 438 outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0);
michael@0 439
michael@0 440 len += 2;
michael@0 441 }
michael@0 442 }
michael@0 443
michael@0 444 *outBufLen = len;
michael@0 445 return PR_TRUE;
michael@0 446 }
michael@0 447
michael@0 448 #ifdef TEST_UTF8
michael@0 449
michael@0 450 #include <stdio.h>
michael@0 451 #include <string.h>
michael@0 452 #include <stdlib.h>
michael@0 453 #include <netinet/in.h> /* for htonl and htons */
michael@0 454
michael@0 455 /*
michael@0 456 * UCS-4 vectors
michael@0 457 */
michael@0 458
michael@0 459 struct ucs4 {
michael@0 460 PRUint32 c;
michael@0 461 char *utf8;
michael@0 462 };
michael@0 463
michael@0 464 /*
michael@0 465 * UCS-2 vectors
michael@0 466 */
michael@0 467
michael@0 468 struct ucs2 {
michael@0 469 PRUint16 c;
michael@0 470 char *utf8;
michael@0 471 };
michael@0 472
michael@0 473 /*
michael@0 474 * UTF-16 vectors
michael@0 475 */
michael@0 476
michael@0 477 struct utf16 {
michael@0 478 PRUint32 c;
michael@0 479 PRUint16 w[2];
michael@0 480 };
michael@0 481
michael@0 482
michael@0 483 /*
michael@0 484 * UCS-4 vectors
michael@0 485 */
michael@0 486
michael@0 487 struct ucs4 ucs4[] = {
michael@0 488 { 0x00000001, "\x01" },
michael@0 489 { 0x00000002, "\x02" },
michael@0 490 { 0x00000003, "\x03" },
michael@0 491 { 0x00000004, "\x04" },
michael@0 492 { 0x00000007, "\x07" },
michael@0 493 { 0x00000008, "\x08" },
michael@0 494 { 0x0000000F, "\x0F" },
michael@0 495 { 0x00000010, "\x10" },
michael@0 496 { 0x0000001F, "\x1F" },
michael@0 497 { 0x00000020, "\x20" },
michael@0 498 { 0x0000003F, "\x3F" },
michael@0 499 { 0x00000040, "\x40" },
michael@0 500 { 0x0000007F, "\x7F" },
michael@0 501
michael@0 502 { 0x00000080, "\xC2\x80" },
michael@0 503 { 0x00000081, "\xC2\x81" },
michael@0 504 { 0x00000082, "\xC2\x82" },
michael@0 505 { 0x00000084, "\xC2\x84" },
michael@0 506 { 0x00000088, "\xC2\x88" },
michael@0 507 { 0x00000090, "\xC2\x90" },
michael@0 508 { 0x000000A0, "\xC2\xA0" },
michael@0 509 { 0x000000C0, "\xC3\x80" },
michael@0 510 { 0x000000FF, "\xC3\xBF" },
michael@0 511 { 0x00000100, "\xC4\x80" },
michael@0 512 { 0x00000101, "\xC4\x81" },
michael@0 513 { 0x00000102, "\xC4\x82" },
michael@0 514 { 0x00000104, "\xC4\x84" },
michael@0 515 { 0x00000108, "\xC4\x88" },
michael@0 516 { 0x00000110, "\xC4\x90" },
michael@0 517 { 0x00000120, "\xC4\xA0" },
michael@0 518 { 0x00000140, "\xC5\x80" },
michael@0 519 { 0x00000180, "\xC6\x80" },
michael@0 520 { 0x000001FF, "\xC7\xBF" },
michael@0 521 { 0x00000200, "\xC8\x80" },
michael@0 522 { 0x00000201, "\xC8\x81" },
michael@0 523 { 0x00000202, "\xC8\x82" },
michael@0 524 { 0x00000204, "\xC8\x84" },
michael@0 525 { 0x00000208, "\xC8\x88" },
michael@0 526 { 0x00000210, "\xC8\x90" },
michael@0 527 { 0x00000220, "\xC8\xA0" },
michael@0 528 { 0x00000240, "\xC9\x80" },
michael@0 529 { 0x00000280, "\xCA\x80" },
michael@0 530 { 0x00000300, "\xCC\x80" },
michael@0 531 { 0x000003FF, "\xCF\xBF" },
michael@0 532 { 0x00000400, "\xD0\x80" },
michael@0 533 { 0x00000401, "\xD0\x81" },
michael@0 534 { 0x00000402, "\xD0\x82" },
michael@0 535 { 0x00000404, "\xD0\x84" },
michael@0 536 { 0x00000408, "\xD0\x88" },
michael@0 537 { 0x00000410, "\xD0\x90" },
michael@0 538 { 0x00000420, "\xD0\xA0" },
michael@0 539 { 0x00000440, "\xD1\x80" },
michael@0 540 { 0x00000480, "\xD2\x80" },
michael@0 541 { 0x00000500, "\xD4\x80" },
michael@0 542 { 0x00000600, "\xD8\x80" },
michael@0 543 { 0x000007FF, "\xDF\xBF" },
michael@0 544
michael@0 545 { 0x00000800, "\xE0\xA0\x80" },
michael@0 546 { 0x00000801, "\xE0\xA0\x81" },
michael@0 547 { 0x00000802, "\xE0\xA0\x82" },
michael@0 548 { 0x00000804, "\xE0\xA0\x84" },
michael@0 549 { 0x00000808, "\xE0\xA0\x88" },
michael@0 550 { 0x00000810, "\xE0\xA0\x90" },
michael@0 551 { 0x00000820, "\xE0\xA0\xA0" },
michael@0 552 { 0x00000840, "\xE0\xA1\x80" },
michael@0 553 { 0x00000880, "\xE0\xA2\x80" },
michael@0 554 { 0x00000900, "\xE0\xA4\x80" },
michael@0 555 { 0x00000A00, "\xE0\xA8\x80" },
michael@0 556 { 0x00000C00, "\xE0\xB0\x80" },
michael@0 557 { 0x00000FFF, "\xE0\xBF\xBF" },
michael@0 558 { 0x00001000, "\xE1\x80\x80" },
michael@0 559 { 0x00001001, "\xE1\x80\x81" },
michael@0 560 { 0x00001002, "\xE1\x80\x82" },
michael@0 561 { 0x00001004, "\xE1\x80\x84" },
michael@0 562 { 0x00001008, "\xE1\x80\x88" },
michael@0 563 { 0x00001010, "\xE1\x80\x90" },
michael@0 564 { 0x00001020, "\xE1\x80\xA0" },
michael@0 565 { 0x00001040, "\xE1\x81\x80" },
michael@0 566 { 0x00001080, "\xE1\x82\x80" },
michael@0 567 { 0x00001100, "\xE1\x84\x80" },
michael@0 568 { 0x00001200, "\xE1\x88\x80" },
michael@0 569 { 0x00001400, "\xE1\x90\x80" },
michael@0 570 { 0x00001800, "\xE1\xA0\x80" },
michael@0 571 { 0x00001FFF, "\xE1\xBF\xBF" },
michael@0 572 { 0x00002000, "\xE2\x80\x80" },
michael@0 573 { 0x00002001, "\xE2\x80\x81" },
michael@0 574 { 0x00002002, "\xE2\x80\x82" },
michael@0 575 { 0x00002004, "\xE2\x80\x84" },
michael@0 576 { 0x00002008, "\xE2\x80\x88" },
michael@0 577 { 0x00002010, "\xE2\x80\x90" },
michael@0 578 { 0x00002020, "\xE2\x80\xA0" },
michael@0 579 { 0x00002040, "\xE2\x81\x80" },
michael@0 580 { 0x00002080, "\xE2\x82\x80" },
michael@0 581 { 0x00002100, "\xE2\x84\x80" },
michael@0 582 { 0x00002200, "\xE2\x88\x80" },
michael@0 583 { 0x00002400, "\xE2\x90\x80" },
michael@0 584 { 0x00002800, "\xE2\xA0\x80" },
michael@0 585 { 0x00003000, "\xE3\x80\x80" },
michael@0 586 { 0x00003FFF, "\xE3\xBF\xBF" },
michael@0 587 { 0x00004000, "\xE4\x80\x80" },
michael@0 588 { 0x00004001, "\xE4\x80\x81" },
michael@0 589 { 0x00004002, "\xE4\x80\x82" },
michael@0 590 { 0x00004004, "\xE4\x80\x84" },
michael@0 591 { 0x00004008, "\xE4\x80\x88" },
michael@0 592 { 0x00004010, "\xE4\x80\x90" },
michael@0 593 { 0x00004020, "\xE4\x80\xA0" },
michael@0 594 { 0x00004040, "\xE4\x81\x80" },
michael@0 595 { 0x00004080, "\xE4\x82\x80" },
michael@0 596 { 0x00004100, "\xE4\x84\x80" },
michael@0 597 { 0x00004200, "\xE4\x88\x80" },
michael@0 598 { 0x00004400, "\xE4\x90\x80" },
michael@0 599 { 0x00004800, "\xE4\xA0\x80" },
michael@0 600 { 0x00005000, "\xE5\x80\x80" },
michael@0 601 { 0x00006000, "\xE6\x80\x80" },
michael@0 602 { 0x00007FFF, "\xE7\xBF\xBF" },
michael@0 603 { 0x00008000, "\xE8\x80\x80" },
michael@0 604 { 0x00008001, "\xE8\x80\x81" },
michael@0 605 { 0x00008002, "\xE8\x80\x82" },
michael@0 606 { 0x00008004, "\xE8\x80\x84" },
michael@0 607 { 0x00008008, "\xE8\x80\x88" },
michael@0 608 { 0x00008010, "\xE8\x80\x90" },
michael@0 609 { 0x00008020, "\xE8\x80\xA0" },
michael@0 610 { 0x00008040, "\xE8\x81\x80" },
michael@0 611 { 0x00008080, "\xE8\x82\x80" },
michael@0 612 { 0x00008100, "\xE8\x84\x80" },
michael@0 613 { 0x00008200, "\xE8\x88\x80" },
michael@0 614 { 0x00008400, "\xE8\x90\x80" },
michael@0 615 { 0x00008800, "\xE8\xA0\x80" },
michael@0 616 { 0x00009000, "\xE9\x80\x80" },
michael@0 617 { 0x0000A000, "\xEA\x80\x80" },
michael@0 618 { 0x0000C000, "\xEC\x80\x80" },
michael@0 619 { 0x0000FFFF, "\xEF\xBF\xBF" },
michael@0 620
michael@0 621 { 0x00010000, "\xF0\x90\x80\x80" },
michael@0 622 { 0x00010001, "\xF0\x90\x80\x81" },
michael@0 623 { 0x00010002, "\xF0\x90\x80\x82" },
michael@0 624 { 0x00010004, "\xF0\x90\x80\x84" },
michael@0 625 { 0x00010008, "\xF0\x90\x80\x88" },
michael@0 626 { 0x00010010, "\xF0\x90\x80\x90" },
michael@0 627 { 0x00010020, "\xF0\x90\x80\xA0" },
michael@0 628 { 0x00010040, "\xF0\x90\x81\x80" },
michael@0 629 { 0x00010080, "\xF0\x90\x82\x80" },
michael@0 630 { 0x00010100, "\xF0\x90\x84\x80" },
michael@0 631 { 0x00010200, "\xF0\x90\x88\x80" },
michael@0 632 { 0x00010400, "\xF0\x90\x90\x80" },
michael@0 633 { 0x00010800, "\xF0\x90\xA0\x80" },
michael@0 634 { 0x00011000, "\xF0\x91\x80\x80" },
michael@0 635 { 0x00012000, "\xF0\x92\x80\x80" },
michael@0 636 { 0x00014000, "\xF0\x94\x80\x80" },
michael@0 637 { 0x00018000, "\xF0\x98\x80\x80" },
michael@0 638 { 0x0001FFFF, "\xF0\x9F\xBF\xBF" },
michael@0 639 { 0x00020000, "\xF0\xA0\x80\x80" },
michael@0 640 { 0x00020001, "\xF0\xA0\x80\x81" },
michael@0 641 { 0x00020002, "\xF0\xA0\x80\x82" },
michael@0 642 { 0x00020004, "\xF0\xA0\x80\x84" },
michael@0 643 { 0x00020008, "\xF0\xA0\x80\x88" },
michael@0 644 { 0x00020010, "\xF0\xA0\x80\x90" },
michael@0 645 { 0x00020020, "\xF0\xA0\x80\xA0" },
michael@0 646 { 0x00020040, "\xF0\xA0\x81\x80" },
michael@0 647 { 0x00020080, "\xF0\xA0\x82\x80" },
michael@0 648 { 0x00020100, "\xF0\xA0\x84\x80" },
michael@0 649 { 0x00020200, "\xF0\xA0\x88\x80" },
michael@0 650 { 0x00020400, "\xF0\xA0\x90\x80" },
michael@0 651 { 0x00020800, "\xF0\xA0\xA0\x80" },
michael@0 652 { 0x00021000, "\xF0\xA1\x80\x80" },
michael@0 653 { 0x00022000, "\xF0\xA2\x80\x80" },
michael@0 654 { 0x00024000, "\xF0\xA4\x80\x80" },
michael@0 655 { 0x00028000, "\xF0\xA8\x80\x80" },
michael@0 656 { 0x00030000, "\xF0\xB0\x80\x80" },
michael@0 657 { 0x0003FFFF, "\xF0\xBF\xBF\xBF" },
michael@0 658 { 0x00040000, "\xF1\x80\x80\x80" },
michael@0 659 { 0x00040001, "\xF1\x80\x80\x81" },
michael@0 660 { 0x00040002, "\xF1\x80\x80\x82" },
michael@0 661 { 0x00040004, "\xF1\x80\x80\x84" },
michael@0 662 { 0x00040008, "\xF1\x80\x80\x88" },
michael@0 663 { 0x00040010, "\xF1\x80\x80\x90" },
michael@0 664 { 0x00040020, "\xF1\x80\x80\xA0" },
michael@0 665 { 0x00040040, "\xF1\x80\x81\x80" },
michael@0 666 { 0x00040080, "\xF1\x80\x82\x80" },
michael@0 667 { 0x00040100, "\xF1\x80\x84\x80" },
michael@0 668 { 0x00040200, "\xF1\x80\x88\x80" },
michael@0 669 { 0x00040400, "\xF1\x80\x90\x80" },
michael@0 670 { 0x00040800, "\xF1\x80\xA0\x80" },
michael@0 671 { 0x00041000, "\xF1\x81\x80\x80" },
michael@0 672 { 0x00042000, "\xF1\x82\x80\x80" },
michael@0 673 { 0x00044000, "\xF1\x84\x80\x80" },
michael@0 674 { 0x00048000, "\xF1\x88\x80\x80" },
michael@0 675 { 0x00050000, "\xF1\x90\x80\x80" },
michael@0 676 { 0x00060000, "\xF1\xA0\x80\x80" },
michael@0 677 { 0x0007FFFF, "\xF1\xBF\xBF\xBF" },
michael@0 678 { 0x00080000, "\xF2\x80\x80\x80" },
michael@0 679 { 0x00080001, "\xF2\x80\x80\x81" },
michael@0 680 { 0x00080002, "\xF2\x80\x80\x82" },
michael@0 681 { 0x00080004, "\xF2\x80\x80\x84" },
michael@0 682 { 0x00080008, "\xF2\x80\x80\x88" },
michael@0 683 { 0x00080010, "\xF2\x80\x80\x90" },
michael@0 684 { 0x00080020, "\xF2\x80\x80\xA0" },
michael@0 685 { 0x00080040, "\xF2\x80\x81\x80" },
michael@0 686 { 0x00080080, "\xF2\x80\x82\x80" },
michael@0 687 { 0x00080100, "\xF2\x80\x84\x80" },
michael@0 688 { 0x00080200, "\xF2\x80\x88\x80" },
michael@0 689 { 0x00080400, "\xF2\x80\x90\x80" },
michael@0 690 { 0x00080800, "\xF2\x80\xA0\x80" },
michael@0 691 { 0x00081000, "\xF2\x81\x80\x80" },
michael@0 692 { 0x00082000, "\xF2\x82\x80\x80" },
michael@0 693 { 0x00084000, "\xF2\x84\x80\x80" },
michael@0 694 { 0x00088000, "\xF2\x88\x80\x80" },
michael@0 695 { 0x00090000, "\xF2\x90\x80\x80" },
michael@0 696 { 0x000A0000, "\xF2\xA0\x80\x80" },
michael@0 697 { 0x000C0000, "\xF3\x80\x80\x80" },
michael@0 698 { 0x000FFFFF, "\xF3\xBF\xBF\xBF" },
michael@0 699 { 0x00100000, "\xF4\x80\x80\x80" },
michael@0 700 { 0x00100001, "\xF4\x80\x80\x81" },
michael@0 701 { 0x00100002, "\xF4\x80\x80\x82" },
michael@0 702 { 0x00100004, "\xF4\x80\x80\x84" },
michael@0 703 { 0x00100008, "\xF4\x80\x80\x88" },
michael@0 704 { 0x00100010, "\xF4\x80\x80\x90" },
michael@0 705 { 0x00100020, "\xF4\x80\x80\xA0" },
michael@0 706 { 0x00100040, "\xF4\x80\x81\x80" },
michael@0 707 { 0x00100080, "\xF4\x80\x82\x80" },
michael@0 708 { 0x00100100, "\xF4\x80\x84\x80" },
michael@0 709 { 0x00100200, "\xF4\x80\x88\x80" },
michael@0 710 { 0x00100400, "\xF4\x80\x90\x80" },
michael@0 711 { 0x00100800, "\xF4\x80\xA0\x80" },
michael@0 712 { 0x00101000, "\xF4\x81\x80\x80" },
michael@0 713 { 0x00102000, "\xF4\x82\x80\x80" },
michael@0 714 { 0x00104000, "\xF4\x84\x80\x80" },
michael@0 715 { 0x00108000, "\xF4\x88\x80\x80" },
michael@0 716 { 0x0010FFFF, "\xF4\x8F\xBF\xBF" },
michael@0 717 };
michael@0 718
michael@0 719 /*
michael@0 720 * UCS-2 vectors
michael@0 721 */
michael@0 722
michael@0 723 struct ucs2 ucs2[] = {
michael@0 724 { 0x0001, "\x01" },
michael@0 725 { 0x0002, "\x02" },
michael@0 726 { 0x0003, "\x03" },
michael@0 727 { 0x0004, "\x04" },
michael@0 728 { 0x0007, "\x07" },
michael@0 729 { 0x0008, "\x08" },
michael@0 730 { 0x000F, "\x0F" },
michael@0 731 { 0x0010, "\x10" },
michael@0 732 { 0x001F, "\x1F" },
michael@0 733 { 0x0020, "\x20" },
michael@0 734 { 0x003F, "\x3F" },
michael@0 735 { 0x0040, "\x40" },
michael@0 736 { 0x007F, "\x7F" },
michael@0 737
michael@0 738 { 0x0080, "\xC2\x80" },
michael@0 739 { 0x0081, "\xC2\x81" },
michael@0 740 { 0x0082, "\xC2\x82" },
michael@0 741 { 0x0084, "\xC2\x84" },
michael@0 742 { 0x0088, "\xC2\x88" },
michael@0 743 { 0x0090, "\xC2\x90" },
michael@0 744 { 0x00A0, "\xC2\xA0" },
michael@0 745 { 0x00C0, "\xC3\x80" },
michael@0 746 { 0x00FF, "\xC3\xBF" },
michael@0 747 { 0x0100, "\xC4\x80" },
michael@0 748 { 0x0101, "\xC4\x81" },
michael@0 749 { 0x0102, "\xC4\x82" },
michael@0 750 { 0x0104, "\xC4\x84" },
michael@0 751 { 0x0108, "\xC4\x88" },
michael@0 752 { 0x0110, "\xC4\x90" },
michael@0 753 { 0x0120, "\xC4\xA0" },
michael@0 754 { 0x0140, "\xC5\x80" },
michael@0 755 { 0x0180, "\xC6\x80" },
michael@0 756 { 0x01FF, "\xC7\xBF" },
michael@0 757 { 0x0200, "\xC8\x80" },
michael@0 758 { 0x0201, "\xC8\x81" },
michael@0 759 { 0x0202, "\xC8\x82" },
michael@0 760 { 0x0204, "\xC8\x84" },
michael@0 761 { 0x0208, "\xC8\x88" },
michael@0 762 { 0x0210, "\xC8\x90" },
michael@0 763 { 0x0220, "\xC8\xA0" },
michael@0 764 { 0x0240, "\xC9\x80" },
michael@0 765 { 0x0280, "\xCA\x80" },
michael@0 766 { 0x0300, "\xCC\x80" },
michael@0 767 { 0x03FF, "\xCF\xBF" },
michael@0 768 { 0x0400, "\xD0\x80" },
michael@0 769 { 0x0401, "\xD0\x81" },
michael@0 770 { 0x0402, "\xD0\x82" },
michael@0 771 { 0x0404, "\xD0\x84" },
michael@0 772 { 0x0408, "\xD0\x88" },
michael@0 773 { 0x0410, "\xD0\x90" },
michael@0 774 { 0x0420, "\xD0\xA0" },
michael@0 775 { 0x0440, "\xD1\x80" },
michael@0 776 { 0x0480, "\xD2\x80" },
michael@0 777 { 0x0500, "\xD4\x80" },
michael@0 778 { 0x0600, "\xD8\x80" },
michael@0 779 { 0x07FF, "\xDF\xBF" },
michael@0 780
michael@0 781 { 0x0800, "\xE0\xA0\x80" },
michael@0 782 { 0x0801, "\xE0\xA0\x81" },
michael@0 783 { 0x0802, "\xE0\xA0\x82" },
michael@0 784 { 0x0804, "\xE0\xA0\x84" },
michael@0 785 { 0x0808, "\xE0\xA0\x88" },
michael@0 786 { 0x0810, "\xE0\xA0\x90" },
michael@0 787 { 0x0820, "\xE0\xA0\xA0" },
michael@0 788 { 0x0840, "\xE0\xA1\x80" },
michael@0 789 { 0x0880, "\xE0\xA2\x80" },
michael@0 790 { 0x0900, "\xE0\xA4\x80" },
michael@0 791 { 0x0A00, "\xE0\xA8\x80" },
michael@0 792 { 0x0C00, "\xE0\xB0\x80" },
michael@0 793 { 0x0FFF, "\xE0\xBF\xBF" },
michael@0 794 { 0x1000, "\xE1\x80\x80" },
michael@0 795 { 0x1001, "\xE1\x80\x81" },
michael@0 796 { 0x1002, "\xE1\x80\x82" },
michael@0 797 { 0x1004, "\xE1\x80\x84" },
michael@0 798 { 0x1008, "\xE1\x80\x88" },
michael@0 799 { 0x1010, "\xE1\x80\x90" },
michael@0 800 { 0x1020, "\xE1\x80\xA0" },
michael@0 801 { 0x1040, "\xE1\x81\x80" },
michael@0 802 { 0x1080, "\xE1\x82\x80" },
michael@0 803 { 0x1100, "\xE1\x84\x80" },
michael@0 804 { 0x1200, "\xE1\x88\x80" },
michael@0 805 { 0x1400, "\xE1\x90\x80" },
michael@0 806 { 0x1800, "\xE1\xA0\x80" },
michael@0 807 { 0x1FFF, "\xE1\xBF\xBF" },
michael@0 808 { 0x2000, "\xE2\x80\x80" },
michael@0 809 { 0x2001, "\xE2\x80\x81" },
michael@0 810 { 0x2002, "\xE2\x80\x82" },
michael@0 811 { 0x2004, "\xE2\x80\x84" },
michael@0 812 { 0x2008, "\xE2\x80\x88" },
michael@0 813 { 0x2010, "\xE2\x80\x90" },
michael@0 814 { 0x2020, "\xE2\x80\xA0" },
michael@0 815 { 0x2040, "\xE2\x81\x80" },
michael@0 816 { 0x2080, "\xE2\x82\x80" },
michael@0 817 { 0x2100, "\xE2\x84\x80" },
michael@0 818 { 0x2200, "\xE2\x88\x80" },
michael@0 819 { 0x2400, "\xE2\x90\x80" },
michael@0 820 { 0x2800, "\xE2\xA0\x80" },
michael@0 821 { 0x3000, "\xE3\x80\x80" },
michael@0 822 { 0x3FFF, "\xE3\xBF\xBF" },
michael@0 823 { 0x4000, "\xE4\x80\x80" },
michael@0 824 { 0x4001, "\xE4\x80\x81" },
michael@0 825 { 0x4002, "\xE4\x80\x82" },
michael@0 826 { 0x4004, "\xE4\x80\x84" },
michael@0 827 { 0x4008, "\xE4\x80\x88" },
michael@0 828 { 0x4010, "\xE4\x80\x90" },
michael@0 829 { 0x4020, "\xE4\x80\xA0" },
michael@0 830 { 0x4040, "\xE4\x81\x80" },
michael@0 831 { 0x4080, "\xE4\x82\x80" },
michael@0 832 { 0x4100, "\xE4\x84\x80" },
michael@0 833 { 0x4200, "\xE4\x88\x80" },
michael@0 834 { 0x4400, "\xE4\x90\x80" },
michael@0 835 { 0x4800, "\xE4\xA0\x80" },
michael@0 836 { 0x5000, "\xE5\x80\x80" },
michael@0 837 { 0x6000, "\xE6\x80\x80" },
michael@0 838 { 0x7FFF, "\xE7\xBF\xBF" },
michael@0 839 { 0x8000, "\xE8\x80\x80" },
michael@0 840 { 0x8001, "\xE8\x80\x81" },
michael@0 841 { 0x8002, "\xE8\x80\x82" },
michael@0 842 { 0x8004, "\xE8\x80\x84" },
michael@0 843 { 0x8008, "\xE8\x80\x88" },
michael@0 844 { 0x8010, "\xE8\x80\x90" },
michael@0 845 { 0x8020, "\xE8\x80\xA0" },
michael@0 846 { 0x8040, "\xE8\x81\x80" },
michael@0 847 { 0x8080, "\xE8\x82\x80" },
michael@0 848 { 0x8100, "\xE8\x84\x80" },
michael@0 849 { 0x8200, "\xE8\x88\x80" },
michael@0 850 { 0x8400, "\xE8\x90\x80" },
michael@0 851 { 0x8800, "\xE8\xA0\x80" },
michael@0 852 { 0x9000, "\xE9\x80\x80" },
michael@0 853 { 0xA000, "\xEA\x80\x80" },
michael@0 854 { 0xC000, "\xEC\x80\x80" },
michael@0 855 { 0xFFFF, "\xEF\xBF\xBF" }
michael@0 856
michael@0 857 };
michael@0 858
michael@0 859 /*
michael@0 860 * UTF-16 vectors
michael@0 861 */
michael@0 862
michael@0 863 struct utf16 utf16[] = {
michael@0 864 { 0x00010000, { 0xD800, 0xDC00 } },
michael@0 865 { 0x00010001, { 0xD800, 0xDC01 } },
michael@0 866 { 0x00010002, { 0xD800, 0xDC02 } },
michael@0 867 { 0x00010003, { 0xD800, 0xDC03 } },
michael@0 868 { 0x00010004, { 0xD800, 0xDC04 } },
michael@0 869 { 0x00010007, { 0xD800, 0xDC07 } },
michael@0 870 { 0x00010008, { 0xD800, 0xDC08 } },
michael@0 871 { 0x0001000F, { 0xD800, 0xDC0F } },
michael@0 872 { 0x00010010, { 0xD800, 0xDC10 } },
michael@0 873 { 0x0001001F, { 0xD800, 0xDC1F } },
michael@0 874 { 0x00010020, { 0xD800, 0xDC20 } },
michael@0 875 { 0x0001003F, { 0xD800, 0xDC3F } },
michael@0 876 { 0x00010040, { 0xD800, 0xDC40 } },
michael@0 877 { 0x0001007F, { 0xD800, 0xDC7F } },
michael@0 878 { 0x00010080, { 0xD800, 0xDC80 } },
michael@0 879 { 0x00010081, { 0xD800, 0xDC81 } },
michael@0 880 { 0x00010082, { 0xD800, 0xDC82 } },
michael@0 881 { 0x00010084, { 0xD800, 0xDC84 } },
michael@0 882 { 0x00010088, { 0xD800, 0xDC88 } },
michael@0 883 { 0x00010090, { 0xD800, 0xDC90 } },
michael@0 884 { 0x000100A0, { 0xD800, 0xDCA0 } },
michael@0 885 { 0x000100C0, { 0xD800, 0xDCC0 } },
michael@0 886 { 0x000100FF, { 0xD800, 0xDCFF } },
michael@0 887 { 0x00010100, { 0xD800, 0xDD00 } },
michael@0 888 { 0x00010101, { 0xD800, 0xDD01 } },
michael@0 889 { 0x00010102, { 0xD800, 0xDD02 } },
michael@0 890 { 0x00010104, { 0xD800, 0xDD04 } },
michael@0 891 { 0x00010108, { 0xD800, 0xDD08 } },
michael@0 892 { 0x00010110, { 0xD800, 0xDD10 } },
michael@0 893 { 0x00010120, { 0xD800, 0xDD20 } },
michael@0 894 { 0x00010140, { 0xD800, 0xDD40 } },
michael@0 895 { 0x00010180, { 0xD800, 0xDD80 } },
michael@0 896 { 0x000101FF, { 0xD800, 0xDDFF } },
michael@0 897 { 0x00010200, { 0xD800, 0xDE00 } },
michael@0 898 { 0x00010201, { 0xD800, 0xDE01 } },
michael@0 899 { 0x00010202, { 0xD800, 0xDE02 } },
michael@0 900 { 0x00010204, { 0xD800, 0xDE04 } },
michael@0 901 { 0x00010208, { 0xD800, 0xDE08 } },
michael@0 902 { 0x00010210, { 0xD800, 0xDE10 } },
michael@0 903 { 0x00010220, { 0xD800, 0xDE20 } },
michael@0 904 { 0x00010240, { 0xD800, 0xDE40 } },
michael@0 905 { 0x00010280, { 0xD800, 0xDE80 } },
michael@0 906 { 0x00010300, { 0xD800, 0xDF00 } },
michael@0 907 { 0x000103FF, { 0xD800, 0xDFFF } },
michael@0 908 { 0x00010400, { 0xD801, 0xDC00 } },
michael@0 909 { 0x00010401, { 0xD801, 0xDC01 } },
michael@0 910 { 0x00010402, { 0xD801, 0xDC02 } },
michael@0 911 { 0x00010404, { 0xD801, 0xDC04 } },
michael@0 912 { 0x00010408, { 0xD801, 0xDC08 } },
michael@0 913 { 0x00010410, { 0xD801, 0xDC10 } },
michael@0 914 { 0x00010420, { 0xD801, 0xDC20 } },
michael@0 915 { 0x00010440, { 0xD801, 0xDC40 } },
michael@0 916 { 0x00010480, { 0xD801, 0xDC80 } },
michael@0 917 { 0x00010500, { 0xD801, 0xDD00 } },
michael@0 918 { 0x00010600, { 0xD801, 0xDE00 } },
michael@0 919 { 0x000107FF, { 0xD801, 0xDFFF } },
michael@0 920 { 0x00010800, { 0xD802, 0xDC00 } },
michael@0 921 { 0x00010801, { 0xD802, 0xDC01 } },
michael@0 922 { 0x00010802, { 0xD802, 0xDC02 } },
michael@0 923 { 0x00010804, { 0xD802, 0xDC04 } },
michael@0 924 { 0x00010808, { 0xD802, 0xDC08 } },
michael@0 925 { 0x00010810, { 0xD802, 0xDC10 } },
michael@0 926 { 0x00010820, { 0xD802, 0xDC20 } },
michael@0 927 { 0x00010840, { 0xD802, 0xDC40 } },
michael@0 928 { 0x00010880, { 0xD802, 0xDC80 } },
michael@0 929 { 0x00010900, { 0xD802, 0xDD00 } },
michael@0 930 { 0x00010A00, { 0xD802, 0xDE00 } },
michael@0 931 { 0x00010C00, { 0xD803, 0xDC00 } },
michael@0 932 { 0x00010FFF, { 0xD803, 0xDFFF } },
michael@0 933 { 0x00011000, { 0xD804, 0xDC00 } },
michael@0 934 { 0x00011001, { 0xD804, 0xDC01 } },
michael@0 935 { 0x00011002, { 0xD804, 0xDC02 } },
michael@0 936 { 0x00011004, { 0xD804, 0xDC04 } },
michael@0 937 { 0x00011008, { 0xD804, 0xDC08 } },
michael@0 938 { 0x00011010, { 0xD804, 0xDC10 } },
michael@0 939 { 0x00011020, { 0xD804, 0xDC20 } },
michael@0 940 { 0x00011040, { 0xD804, 0xDC40 } },
michael@0 941 { 0x00011080, { 0xD804, 0xDC80 } },
michael@0 942 { 0x00011100, { 0xD804, 0xDD00 } },
michael@0 943 { 0x00011200, { 0xD804, 0xDE00 } },
michael@0 944 { 0x00011400, { 0xD805, 0xDC00 } },
michael@0 945 { 0x00011800, { 0xD806, 0xDC00 } },
michael@0 946 { 0x00011FFF, { 0xD807, 0xDFFF } },
michael@0 947 { 0x00012000, { 0xD808, 0xDC00 } },
michael@0 948 { 0x00012001, { 0xD808, 0xDC01 } },
michael@0 949 { 0x00012002, { 0xD808, 0xDC02 } },
michael@0 950 { 0x00012004, { 0xD808, 0xDC04 } },
michael@0 951 { 0x00012008, { 0xD808, 0xDC08 } },
michael@0 952 { 0x00012010, { 0xD808, 0xDC10 } },
michael@0 953 { 0x00012020, { 0xD808, 0xDC20 } },
michael@0 954 { 0x00012040, { 0xD808, 0xDC40 } },
michael@0 955 { 0x00012080, { 0xD808, 0xDC80 } },
michael@0 956 { 0x00012100, { 0xD808, 0xDD00 } },
michael@0 957 { 0x00012200, { 0xD808, 0xDE00 } },
michael@0 958 { 0x00012400, { 0xD809, 0xDC00 } },
michael@0 959 { 0x00012800, { 0xD80A, 0xDC00 } },
michael@0 960 { 0x00013000, { 0xD80C, 0xDC00 } },
michael@0 961 { 0x00013FFF, { 0xD80F, 0xDFFF } },
michael@0 962 { 0x00014000, { 0xD810, 0xDC00 } },
michael@0 963 { 0x00014001, { 0xD810, 0xDC01 } },
michael@0 964 { 0x00014002, { 0xD810, 0xDC02 } },
michael@0 965 { 0x00014004, { 0xD810, 0xDC04 } },
michael@0 966 { 0x00014008, { 0xD810, 0xDC08 } },
michael@0 967 { 0x00014010, { 0xD810, 0xDC10 } },
michael@0 968 { 0x00014020, { 0xD810, 0xDC20 } },
michael@0 969 { 0x00014040, { 0xD810, 0xDC40 } },
michael@0 970 { 0x00014080, { 0xD810, 0xDC80 } },
michael@0 971 { 0x00014100, { 0xD810, 0xDD00 } },
michael@0 972 { 0x00014200, { 0xD810, 0xDE00 } },
michael@0 973 { 0x00014400, { 0xD811, 0xDC00 } },
michael@0 974 { 0x00014800, { 0xD812, 0xDC00 } },
michael@0 975 { 0x00015000, { 0xD814, 0xDC00 } },
michael@0 976 { 0x00016000, { 0xD818, 0xDC00 } },
michael@0 977 { 0x00017FFF, { 0xD81F, 0xDFFF } },
michael@0 978 { 0x00018000, { 0xD820, 0xDC00 } },
michael@0 979 { 0x00018001, { 0xD820, 0xDC01 } },
michael@0 980 { 0x00018002, { 0xD820, 0xDC02 } },
michael@0 981 { 0x00018004, { 0xD820, 0xDC04 } },
michael@0 982 { 0x00018008, { 0xD820, 0xDC08 } },
michael@0 983 { 0x00018010, { 0xD820, 0xDC10 } },
michael@0 984 { 0x00018020, { 0xD820, 0xDC20 } },
michael@0 985 { 0x00018040, { 0xD820, 0xDC40 } },
michael@0 986 { 0x00018080, { 0xD820, 0xDC80 } },
michael@0 987 { 0x00018100, { 0xD820, 0xDD00 } },
michael@0 988 { 0x00018200, { 0xD820, 0xDE00 } },
michael@0 989 { 0x00018400, { 0xD821, 0xDC00 } },
michael@0 990 { 0x00018800, { 0xD822, 0xDC00 } },
michael@0 991 { 0x00019000, { 0xD824, 0xDC00 } },
michael@0 992 { 0x0001A000, { 0xD828, 0xDC00 } },
michael@0 993 { 0x0001C000, { 0xD830, 0xDC00 } },
michael@0 994 { 0x0001FFFF, { 0xD83F, 0xDFFF } },
michael@0 995 { 0x00020000, { 0xD840, 0xDC00 } },
michael@0 996 { 0x00020001, { 0xD840, 0xDC01 } },
michael@0 997 { 0x00020002, { 0xD840, 0xDC02 } },
michael@0 998 { 0x00020004, { 0xD840, 0xDC04 } },
michael@0 999 { 0x00020008, { 0xD840, 0xDC08 } },
michael@0 1000 { 0x00020010, { 0xD840, 0xDC10 } },
michael@0 1001 { 0x00020020, { 0xD840, 0xDC20 } },
michael@0 1002 { 0x00020040, { 0xD840, 0xDC40 } },
michael@0 1003 { 0x00020080, { 0xD840, 0xDC80 } },
michael@0 1004 { 0x00020100, { 0xD840, 0xDD00 } },
michael@0 1005 { 0x00020200, { 0xD840, 0xDE00 } },
michael@0 1006 { 0x00020400, { 0xD841, 0xDC00 } },
michael@0 1007 { 0x00020800, { 0xD842, 0xDC00 } },
michael@0 1008 { 0x00021000, { 0xD844, 0xDC00 } },
michael@0 1009 { 0x00022000, { 0xD848, 0xDC00 } },
michael@0 1010 { 0x00024000, { 0xD850, 0xDC00 } },
michael@0 1011 { 0x00028000, { 0xD860, 0xDC00 } },
michael@0 1012 { 0x0002FFFF, { 0xD87F, 0xDFFF } },
michael@0 1013 { 0x00030000, { 0xD880, 0xDC00 } },
michael@0 1014 { 0x00030001, { 0xD880, 0xDC01 } },
michael@0 1015 { 0x00030002, { 0xD880, 0xDC02 } },
michael@0 1016 { 0x00030004, { 0xD880, 0xDC04 } },
michael@0 1017 { 0x00030008, { 0xD880, 0xDC08 } },
michael@0 1018 { 0x00030010, { 0xD880, 0xDC10 } },
michael@0 1019 { 0x00030020, { 0xD880, 0xDC20 } },
michael@0 1020 { 0x00030040, { 0xD880, 0xDC40 } },
michael@0 1021 { 0x00030080, { 0xD880, 0xDC80 } },
michael@0 1022 { 0x00030100, { 0xD880, 0xDD00 } },
michael@0 1023 { 0x00030200, { 0xD880, 0xDE00 } },
michael@0 1024 { 0x00030400, { 0xD881, 0xDC00 } },
michael@0 1025 { 0x00030800, { 0xD882, 0xDC00 } },
michael@0 1026 { 0x00031000, { 0xD884, 0xDC00 } },
michael@0 1027 { 0x00032000, { 0xD888, 0xDC00 } },
michael@0 1028 { 0x00034000, { 0xD890, 0xDC00 } },
michael@0 1029 { 0x00038000, { 0xD8A0, 0xDC00 } },
michael@0 1030 { 0x0003FFFF, { 0xD8BF, 0xDFFF } },
michael@0 1031 { 0x00040000, { 0xD8C0, 0xDC00 } },
michael@0 1032 { 0x00040001, { 0xD8C0, 0xDC01 } },
michael@0 1033 { 0x00040002, { 0xD8C0, 0xDC02 } },
michael@0 1034 { 0x00040004, { 0xD8C0, 0xDC04 } },
michael@0 1035 { 0x00040008, { 0xD8C0, 0xDC08 } },
michael@0 1036 { 0x00040010, { 0xD8C0, 0xDC10 } },
michael@0 1037 { 0x00040020, { 0xD8C0, 0xDC20 } },
michael@0 1038 { 0x00040040, { 0xD8C0, 0xDC40 } },
michael@0 1039 { 0x00040080, { 0xD8C0, 0xDC80 } },
michael@0 1040 { 0x00040100, { 0xD8C0, 0xDD00 } },
michael@0 1041 { 0x00040200, { 0xD8C0, 0xDE00 } },
michael@0 1042 { 0x00040400, { 0xD8C1, 0xDC00 } },
michael@0 1043 { 0x00040800, { 0xD8C2, 0xDC00 } },
michael@0 1044 { 0x00041000, { 0xD8C4, 0xDC00 } },
michael@0 1045 { 0x00042000, { 0xD8C8, 0xDC00 } },
michael@0 1046 { 0x00044000, { 0xD8D0, 0xDC00 } },
michael@0 1047 { 0x00048000, { 0xD8E0, 0xDC00 } },
michael@0 1048 { 0x0004FFFF, { 0xD8FF, 0xDFFF } },
michael@0 1049 { 0x00050000, { 0xD900, 0xDC00 } },
michael@0 1050 { 0x00050001, { 0xD900, 0xDC01 } },
michael@0 1051 { 0x00050002, { 0xD900, 0xDC02 } },
michael@0 1052 { 0x00050004, { 0xD900, 0xDC04 } },
michael@0 1053 { 0x00050008, { 0xD900, 0xDC08 } },
michael@0 1054 { 0x00050010, { 0xD900, 0xDC10 } },
michael@0 1055 { 0x00050020, { 0xD900, 0xDC20 } },
michael@0 1056 { 0x00050040, { 0xD900, 0xDC40 } },
michael@0 1057 { 0x00050080, { 0xD900, 0xDC80 } },
michael@0 1058 { 0x00050100, { 0xD900, 0xDD00 } },
michael@0 1059 { 0x00050200, { 0xD900, 0xDE00 } },
michael@0 1060 { 0x00050400, { 0xD901, 0xDC00 } },
michael@0 1061 { 0x00050800, { 0xD902, 0xDC00 } },
michael@0 1062 { 0x00051000, { 0xD904, 0xDC00 } },
michael@0 1063 { 0x00052000, { 0xD908, 0xDC00 } },
michael@0 1064 { 0x00054000, { 0xD910, 0xDC00 } },
michael@0 1065 { 0x00058000, { 0xD920, 0xDC00 } },
michael@0 1066 { 0x00060000, { 0xD940, 0xDC00 } },
michael@0 1067 { 0x00070000, { 0xD980, 0xDC00 } },
michael@0 1068 { 0x0007FFFF, { 0xD9BF, 0xDFFF } },
michael@0 1069 { 0x00080000, { 0xD9C0, 0xDC00 } },
michael@0 1070 { 0x00080001, { 0xD9C0, 0xDC01 } },
michael@0 1071 { 0x00080002, { 0xD9C0, 0xDC02 } },
michael@0 1072 { 0x00080004, { 0xD9C0, 0xDC04 } },
michael@0 1073 { 0x00080008, { 0xD9C0, 0xDC08 } },
michael@0 1074 { 0x00080010, { 0xD9C0, 0xDC10 } },
michael@0 1075 { 0x00080020, { 0xD9C0, 0xDC20 } },
michael@0 1076 { 0x00080040, { 0xD9C0, 0xDC40 } },
michael@0 1077 { 0x00080080, { 0xD9C0, 0xDC80 } },
michael@0 1078 { 0x00080100, { 0xD9C0, 0xDD00 } },
michael@0 1079 { 0x00080200, { 0xD9C0, 0xDE00 } },
michael@0 1080 { 0x00080400, { 0xD9C1, 0xDC00 } },
michael@0 1081 { 0x00080800, { 0xD9C2, 0xDC00 } },
michael@0 1082 { 0x00081000, { 0xD9C4, 0xDC00 } },
michael@0 1083 { 0x00082000, { 0xD9C8, 0xDC00 } },
michael@0 1084 { 0x00084000, { 0xD9D0, 0xDC00 } },
michael@0 1085 { 0x00088000, { 0xD9E0, 0xDC00 } },
michael@0 1086 { 0x0008FFFF, { 0xD9FF, 0xDFFF } },
michael@0 1087 { 0x00090000, { 0xDA00, 0xDC00 } },
michael@0 1088 { 0x00090001, { 0xDA00, 0xDC01 } },
michael@0 1089 { 0x00090002, { 0xDA00, 0xDC02 } },
michael@0 1090 { 0x00090004, { 0xDA00, 0xDC04 } },
michael@0 1091 { 0x00090008, { 0xDA00, 0xDC08 } },
michael@0 1092 { 0x00090010, { 0xDA00, 0xDC10 } },
michael@0 1093 { 0x00090020, { 0xDA00, 0xDC20 } },
michael@0 1094 { 0x00090040, { 0xDA00, 0xDC40 } },
michael@0 1095 { 0x00090080, { 0xDA00, 0xDC80 } },
michael@0 1096 { 0x00090100, { 0xDA00, 0xDD00 } },
michael@0 1097 { 0x00090200, { 0xDA00, 0xDE00 } },
michael@0 1098 { 0x00090400, { 0xDA01, 0xDC00 } },
michael@0 1099 { 0x00090800, { 0xDA02, 0xDC00 } },
michael@0 1100 { 0x00091000, { 0xDA04, 0xDC00 } },
michael@0 1101 { 0x00092000, { 0xDA08, 0xDC00 } },
michael@0 1102 { 0x00094000, { 0xDA10, 0xDC00 } },
michael@0 1103 { 0x00098000, { 0xDA20, 0xDC00 } },
michael@0 1104 { 0x000A0000, { 0xDA40, 0xDC00 } },
michael@0 1105 { 0x000B0000, { 0xDA80, 0xDC00 } },
michael@0 1106 { 0x000C0000, { 0xDAC0, 0xDC00 } },
michael@0 1107 { 0x000D0000, { 0xDB00, 0xDC00 } },
michael@0 1108 { 0x000FFFFF, { 0xDBBF, 0xDFFF } },
michael@0 1109 { 0x0010FFFF, { 0xDBFF, 0xDFFF } }
michael@0 1110
michael@0 1111 };
michael@0 1112
michael@0 1113 /* illegal utf8 sequences */
michael@0 1114 char *utf8_bad[] = {
michael@0 1115 "\xC0\x80",
michael@0 1116 "\xC1\xBF",
michael@0 1117 "\xE0\x80\x80",
michael@0 1118 "\xE0\x9F\xBF",
michael@0 1119 "\xF0\x80\x80\x80",
michael@0 1120 "\xF0\x8F\xBF\xBF",
michael@0 1121 "\xF4\x90\x80\x80",
michael@0 1122 "\xF7\xBF\xBF\xBF",
michael@0 1123 "\xF8\x80\x80\x80\x80",
michael@0 1124 "\xF8\x88\x80\x80\x80",
michael@0 1125 "\xF8\x92\x80\x80\x80",
michael@0 1126 "\xF8\x9F\xBF\xBF\xBF",
michael@0 1127 "\xF8\xA0\x80\x80\x80",
michael@0 1128 "\xF8\xA8\x80\x80\x80",
michael@0 1129 "\xF8\xB0\x80\x80\x80",
michael@0 1130 "\xF8\xBF\xBF\xBF\xBF",
michael@0 1131 "\xF9\x80\x80\x80\x88",
michael@0 1132 "\xF9\x84\x80\x80\x80",
michael@0 1133 "\xF9\xBF\xBF\xBF\xBF",
michael@0 1134 "\xFA\x80\x80\x80\x80",
michael@0 1135 "\xFA\x90\x80\x80\x80",
michael@0 1136 "\xFB\xBF\xBF\xBF\xBF",
michael@0 1137 "\xFC\x84\x80\x80\x80\x81",
michael@0 1138 "\xFC\x85\x80\x80\x80\x80",
michael@0 1139 "\xFC\x86\x80\x80\x80\x80",
michael@0 1140 "\xFC\x87\xBF\xBF\xBF\xBF",
michael@0 1141 "\xFC\x88\xA0\x80\x80\x80",
michael@0 1142 "\xFC\x89\x80\x80\x80\x80",
michael@0 1143 "\xFC\x8A\x80\x80\x80\x80",
michael@0 1144 "\xFC\x90\x80\x80\x80\x82",
michael@0 1145 "\xFD\x80\x80\x80\x80\x80",
michael@0 1146 "\xFD\xBF\xBF\xBF\xBF\xBF",
michael@0 1147 "\x80",
michael@0 1148 "\xC3",
michael@0 1149 "\xC3\xC3\x80",
michael@0 1150 "\xED\xA0\x80",
michael@0 1151 "\xED\xBF\x80",
michael@0 1152 "\xED\xBF\xBF",
michael@0 1153 "\xED\xA0\x80\xE0\xBF\xBF",
michael@0 1154 };
michael@0 1155
michael@0 1156 static void
michael@0 1157 dump_utf8
michael@0 1158 (
michael@0 1159 char *word,
michael@0 1160 unsigned char *utf8,
michael@0 1161 char *end
michael@0 1162 )
michael@0 1163 {
michael@0 1164 fprintf(stdout, "%s ", word);
michael@0 1165 for( ; *utf8; utf8++ ) {
michael@0 1166 fprintf(stdout, "%02.2x ", (unsigned int)*utf8);
michael@0 1167 }
michael@0 1168 fprintf(stdout, "%s", end);
michael@0 1169 }
michael@0 1170
michael@0 1171 static PRBool
michael@0 1172 test_ucs4_chars
michael@0 1173 (
michael@0 1174 void
michael@0 1175 )
michael@0 1176 {
michael@0 1177 PRBool rv = PR_TRUE;
michael@0 1178 int i;
michael@0 1179
michael@0 1180 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
michael@0 1181 struct ucs4 *e = &ucs4[i];
michael@0 1182 PRBool result;
michael@0 1183 unsigned char utf8[8];
michael@0 1184 unsigned int len = 0;
michael@0 1185 PRUint32 back = 0;
michael@0 1186
michael@0 1187 (void)memset(utf8, 0, sizeof(utf8));
michael@0 1188
michael@0 1189 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
michael@0 1190 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
michael@0 1191
michael@0 1192 if( !result ) {
michael@0 1193 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c);
michael@0 1194 rv = PR_FALSE;
michael@0 1195 continue;
michael@0 1196 }
michael@0 1197
michael@0 1198 if( (len >= sizeof(utf8)) ||
michael@0 1199 (strlen(e->utf8) != len) ||
michael@0 1200 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
michael@0 1201 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c);
michael@0 1202 dump_utf8("expected", e->utf8, ", ");
michael@0 1203 dump_utf8("received", utf8, "\n");
michael@0 1204 rv = PR_FALSE;
michael@0 1205 continue;
michael@0 1206 }
michael@0 1207
michael@0 1208 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
michael@0 1209 utf8, len, (unsigned char *)&back, sizeof(back), &len);
michael@0 1210
michael@0 1211 if( !result ) {
michael@0 1212 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n");
michael@0 1213 rv = PR_FALSE;
michael@0 1214 continue;
michael@0 1215 }
michael@0 1216
michael@0 1217 if( (sizeof(back) != len) || (e->c != back) ) {
michael@0 1218 dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:");
michael@0 1219 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
michael@0 1220 rv = PR_FALSE;
michael@0 1221 continue;
michael@0 1222 }
michael@0 1223 }
michael@0 1224
michael@0 1225 return rv;
michael@0 1226 }
michael@0 1227
michael@0 1228 static PRBool
michael@0 1229 test_ucs2_chars
michael@0 1230 (
michael@0 1231 void
michael@0 1232 )
michael@0 1233 {
michael@0 1234 PRBool rv = PR_TRUE;
michael@0 1235 int i;
michael@0 1236
michael@0 1237 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
michael@0 1238 struct ucs2 *e = &ucs2[i];
michael@0 1239 PRBool result;
michael@0 1240 unsigned char utf8[8];
michael@0 1241 unsigned int len = 0;
michael@0 1242 PRUint16 back = 0;
michael@0 1243
michael@0 1244 (void)memset(utf8, 0, sizeof(utf8));
michael@0 1245
michael@0 1246 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
michael@0 1247 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
michael@0 1248
michael@0 1249 if( !result ) {
michael@0 1250 fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c);
michael@0 1251 rv = PR_FALSE;
michael@0 1252 continue;
michael@0 1253 }
michael@0 1254
michael@0 1255 if( (len >= sizeof(utf8)) ||
michael@0 1256 (strlen(e->utf8) != len) ||
michael@0 1257 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
michael@0 1258 fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c);
michael@0 1259 dump_utf8("expected", e->utf8, ", ");
michael@0 1260 dump_utf8("received", utf8, "\n");
michael@0 1261 rv = PR_FALSE;
michael@0 1262 continue;
michael@0 1263 }
michael@0 1264
michael@0 1265 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
michael@0 1266 utf8, len, (unsigned char *)&back, sizeof(back), &len);
michael@0 1267
michael@0 1268 if( !result ) {
michael@0 1269 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n");
michael@0 1270 rv = PR_FALSE;
michael@0 1271 continue;
michael@0 1272 }
michael@0 1273
michael@0 1274 if( (sizeof(back) != len) || (e->c != back) ) {
michael@0 1275 dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:");
michael@0 1276 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back);
michael@0 1277 rv = PR_FALSE;
michael@0 1278 continue;
michael@0 1279 }
michael@0 1280 }
michael@0 1281
michael@0 1282 return rv;
michael@0 1283 }
michael@0 1284
michael@0 1285 static PRBool
michael@0 1286 test_utf16_chars
michael@0 1287 (
michael@0 1288 void
michael@0 1289 )
michael@0 1290 {
michael@0 1291 PRBool rv = PR_TRUE;
michael@0 1292 int i;
michael@0 1293
michael@0 1294 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
michael@0 1295 struct utf16 *e = &utf16[i];
michael@0 1296 PRBool result;
michael@0 1297 unsigned char utf8[8];
michael@0 1298 unsigned int len = 0;
michael@0 1299 PRUint32 back32 = 0;
michael@0 1300 PRUint16 back[2];
michael@0 1301
michael@0 1302 (void)memset(utf8, 0, sizeof(utf8));
michael@0 1303
michael@0 1304 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
michael@0 1305 (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len);
michael@0 1306
michael@0 1307 if( !result ) {
michael@0 1308 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n",
michael@0 1309 e->w[0], e->w[1]);
michael@0 1310 rv = PR_FALSE;
michael@0 1311 continue;
michael@0 1312 }
michael@0 1313
michael@0 1314 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
michael@0 1315 utf8, len, (unsigned char *)&back32, sizeof(back32), &len);
michael@0 1316
michael@0 1317 if( 4 != len ) {
michael@0 1318 fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: "
michael@0 1319 "unexpected len %d\n", e->w[0], e->w[1], len);
michael@0 1320 rv = PR_FALSE;
michael@0 1321 continue;
michael@0 1322 }
michael@0 1323
michael@0 1324 utf8[len] = '\0'; /* null-terminate for printing */
michael@0 1325
michael@0 1326 if( !result ) {
michael@0 1327 dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n");
michael@0 1328 rv = PR_FALSE;
michael@0 1329 continue;
michael@0 1330 }
michael@0 1331
michael@0 1332 if( (sizeof(back32) != len) || (e->c != back32) ) {
michael@0 1333 fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ",
michael@0 1334 e->w[0], e->w[1]);
michael@0 1335 dump_utf8("to UTF-8", utf8, "and then to UCS-4: ");
michael@0 1336 if( sizeof(back32) != len ) {
michael@0 1337 fprintf(stdout, "len is %d\n", len);
michael@0 1338 } else {
michael@0 1339 fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32);
michael@0 1340 }
michael@0 1341 rv = PR_FALSE;
michael@0 1342 continue;
michael@0 1343 }
michael@0 1344
michael@0 1345 (void)memset(utf8, 0, sizeof(utf8));
michael@0 1346 back[0] = back[1] = 0;
michael@0 1347
michael@0 1348 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
michael@0 1349 (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len);
michael@0 1350
michael@0 1351 if( !result ) {
michael@0 1352 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n",
michael@0 1353 e->c);
michael@0 1354 rv = PR_FALSE;
michael@0 1355 continue;
michael@0 1356 }
michael@0 1357
michael@0 1358 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
michael@0 1359 utf8, len, (unsigned char *)&back[0], sizeof(back), &len);
michael@0 1360
michael@0 1361 if( 4 != len ) {
michael@0 1362 fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: "
michael@0 1363 "unexpected len %d\n", e->c, len);
michael@0 1364 rv = PR_FALSE;
michael@0 1365 continue;
michael@0 1366 }
michael@0 1367
michael@0 1368 utf8[len] = '\0'; /* null-terminate for printing */
michael@0 1369
michael@0 1370 if( !result ) {
michael@0 1371 dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n");
michael@0 1372 rv = PR_FALSE;
michael@0 1373 continue;
michael@0 1374 }
michael@0 1375
michael@0 1376 if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) {
michael@0 1377 fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c);
michael@0 1378 dump_utf8("", utf8, "and then to UTF-16:");
michael@0 1379 if( sizeof(back) != len ) {
michael@0 1380 fprintf(stdout, "len is %d\n", len);
michael@0 1381 } else {
michael@0 1382 fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n",
michael@0 1383 e->w[0], e->w[1], back[0], back[1]);
michael@0 1384 }
michael@0 1385 rv = PR_FALSE;
michael@0 1386 continue;
michael@0 1387 }
michael@0 1388 }
michael@0 1389
michael@0 1390 return rv;
michael@0 1391 }
michael@0 1392
michael@0 1393 static PRBool
michael@0 1394 test_utf8_bad_chars
michael@0 1395 (
michael@0 1396 void
michael@0 1397 )
michael@0 1398 {
michael@0 1399 PRBool rv = PR_TRUE;
michael@0 1400 int i;
michael@0 1401
michael@0 1402 for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) {
michael@0 1403 PRBool result;
michael@0 1404 unsigned char destbuf[30];
michael@0 1405 unsigned int len = 0;
michael@0 1406
michael@0 1407 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
michael@0 1408 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
michael@0 1409
michael@0 1410 if( result ) {
michael@0 1411 dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n");
michael@0 1412 rv = PR_FALSE;
michael@0 1413 continue;
michael@0 1414 }
michael@0 1415 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
michael@0 1416 (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len);
michael@0 1417
michael@0 1418 if( result ) {
michael@0 1419 dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n");
michael@0 1420 rv = PR_FALSE;
michael@0 1421 continue;
michael@0 1422 }
michael@0 1423
michael@0 1424 }
michael@0 1425
michael@0 1426 return rv;
michael@0 1427 }
michael@0 1428
michael@0 1429 static PRBool
michael@0 1430 test_iso88591_chars
michael@0 1431 (
michael@0 1432 void
michael@0 1433 )
michael@0 1434 {
michael@0 1435 PRBool rv = PR_TRUE;
michael@0 1436 int i;
michael@0 1437
michael@0 1438 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
michael@0 1439 struct ucs2 *e = &ucs2[i];
michael@0 1440 PRBool result;
michael@0 1441 unsigned char iso88591;
michael@0 1442 unsigned char utf8[3];
michael@0 1443 unsigned int len = 0;
michael@0 1444
michael@0 1445 if (ntohs(e->c) > 0xFF) continue;
michael@0 1446
michael@0 1447 (void)memset(utf8, 0, sizeof(utf8));
michael@0 1448 iso88591 = ntohs(e->c);
michael@0 1449
michael@0 1450 result = sec_port_iso88591_utf8_conversion_function(&iso88591,
michael@0 1451 1, utf8, sizeof(utf8), &len);
michael@0 1452
michael@0 1453 if( !result ) {
michael@0 1454 fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591);
michael@0 1455 rv = PR_FALSE;
michael@0 1456 continue;
michael@0 1457 }
michael@0 1458
michael@0 1459 if( (len >= sizeof(utf8)) ||
michael@0 1460 (strlen(e->utf8) != len) ||
michael@0 1461 (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) {
michael@0 1462 fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591);
michael@0 1463 dump_utf8("expected", e->utf8, ", ");
michael@0 1464 dump_utf8("received", utf8, "\n");
michael@0 1465 rv = PR_FALSE;
michael@0 1466 continue;
michael@0 1467 }
michael@0 1468
michael@0 1469 }
michael@0 1470
michael@0 1471 return rv;
michael@0 1472 }
michael@0 1473
michael@0 1474 static PRBool
michael@0 1475 test_zeroes
michael@0 1476 (
michael@0 1477 void
michael@0 1478 )
michael@0 1479 {
michael@0 1480 PRBool rv = PR_TRUE;
michael@0 1481 PRBool result;
michael@0 1482 PRUint32 lzero = 0;
michael@0 1483 PRUint16 szero = 0;
michael@0 1484 unsigned char utf8[8];
michael@0 1485 unsigned int len = 0;
michael@0 1486 PRUint32 lback = 1;
michael@0 1487 PRUint16 sback = 1;
michael@0 1488
michael@0 1489 (void)memset(utf8, 1, sizeof(utf8));
michael@0 1490
michael@0 1491 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
michael@0 1492 (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len);
michael@0 1493
michael@0 1494 if( !result ) {
michael@0 1495 fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n");
michael@0 1496 rv = PR_FALSE;
michael@0 1497 } else if( 1 != len ) {
michael@0 1498 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len);
michael@0 1499 rv = PR_FALSE;
michael@0 1500 } else if( '\0' != *utf8 ) {
michael@0 1501 fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ,"
michael@0 1502 "received %02.2x\n", (unsigned int)*utf8);
michael@0 1503 rv = PR_FALSE;
michael@0 1504 }
michael@0 1505
michael@0 1506 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
michael@0 1507 "", 1, (unsigned char *)&lback, sizeof(lback), &len);
michael@0 1508
michael@0 1509 if( !result ) {
michael@0 1510 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n");
michael@0 1511 rv = PR_FALSE;
michael@0 1512 } else if( 4 != len ) {
michael@0 1513 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len);
michael@0 1514 rv = PR_FALSE;
michael@0 1515 } else if( 0 != lback ) {
michael@0 1516 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: "
michael@0 1517 "expected 0x00000000, received 0x%08.8x\n", lback);
michael@0 1518 rv = PR_FALSE;
michael@0 1519 }
michael@0 1520
michael@0 1521 (void)memset(utf8, 1, sizeof(utf8));
michael@0 1522
michael@0 1523 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
michael@0 1524 (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len);
michael@0 1525
michael@0 1526 if( !result ) {
michael@0 1527 fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n");
michael@0 1528 rv = PR_FALSE;
michael@0 1529 } else if( 1 != len ) {
michael@0 1530 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len);
michael@0 1531 rv = PR_FALSE;
michael@0 1532 } else if( '\0' != *utf8 ) {
michael@0 1533 fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ,"
michael@0 1534 "received %02.2x\n", (unsigned int)*utf8);
michael@0 1535 rv = PR_FALSE;
michael@0 1536 }
michael@0 1537
michael@0 1538 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
michael@0 1539 "", 1, (unsigned char *)&sback, sizeof(sback), &len);
michael@0 1540
michael@0 1541 if( !result ) {
michael@0 1542 fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n");
michael@0 1543 rv = PR_FALSE;
michael@0 1544 } else if( 2 != len ) {
michael@0 1545 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len);
michael@0 1546 rv = PR_FALSE;
michael@0 1547 } else if( 0 != sback ) {
michael@0 1548 fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: "
michael@0 1549 "expected 0x0000, received 0x%04.4x\n", sback);
michael@0 1550 rv = PR_FALSE;
michael@0 1551 }
michael@0 1552
michael@0 1553 return rv;
michael@0 1554 }
michael@0 1555
michael@0 1556 static PRBool
michael@0 1557 test_multichars
michael@0 1558 (
michael@0 1559 void
michael@0 1560 )
michael@0 1561 {
michael@0 1562 int i;
michael@0 1563 unsigned int len, lenout;
michael@0 1564 PRUint32 *ucs4s;
michael@0 1565 char *ucs4_utf8;
michael@0 1566 PRUint16 *ucs2s;
michael@0 1567 char *ucs2_utf8;
michael@0 1568 void *tmp;
michael@0 1569 PRBool result;
michael@0 1570
michael@0 1571 ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32));
michael@0 1572 ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16));
michael@0 1573
michael@0 1574 if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) {
michael@0 1575 fprintf(stderr, "out of memory\n");
michael@0 1576 exit(1);
michael@0 1577 }
michael@0 1578
michael@0 1579 len = 0;
michael@0 1580 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
michael@0 1581 ucs4s[i] = ucs4[i].c;
michael@0 1582 len += strlen(ucs4[i].utf8);
michael@0 1583 }
michael@0 1584
michael@0 1585 ucs4_utf8 = (char *)malloc(len);
michael@0 1586
michael@0 1587 len = 0;
michael@0 1588 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
michael@0 1589 ucs2s[i] = ucs2[i].c;
michael@0 1590 len += strlen(ucs2[i].utf8);
michael@0 1591 }
michael@0 1592
michael@0 1593 ucs2_utf8 = (char *)malloc(len);
michael@0 1594
michael@0 1595 if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) {
michael@0 1596 fprintf(stderr, "out of memory\n");
michael@0 1597 exit(1);
michael@0 1598 }
michael@0 1599
michael@0 1600 *ucs4_utf8 = '\0';
michael@0 1601 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
michael@0 1602 strcat(ucs4_utf8, ucs4[i].utf8);
michael@0 1603 }
michael@0 1604
michael@0 1605 *ucs2_utf8 = '\0';
michael@0 1606 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
michael@0 1607 strcat(ucs2_utf8, ucs2[i].utf8);
michael@0 1608 }
michael@0 1609
michael@0 1610 /* UTF-8 -> UCS-4 */
michael@0 1611 len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32);
michael@0 1612 tmp = calloc(len, 1);
michael@0 1613 if( (void *)NULL == tmp ) {
michael@0 1614 fprintf(stderr, "out of memory\n");
michael@0 1615 exit(1);
michael@0 1616 }
michael@0 1617
michael@0 1618 result = sec_port_ucs4_utf8_conversion_function(PR_TRUE,
michael@0 1619 ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout);
michael@0 1620 if( !result ) {
michael@0 1621 fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n");
michael@0 1622 goto done;
michael@0 1623 }
michael@0 1624
michael@0 1625 if( lenout != len ) {
michael@0 1626 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n");
michael@0 1627 goto loser;
michael@0 1628 }
michael@0 1629
michael@0 1630 if( 0 != memcmp(ucs4s, tmp, len) ) {
michael@0 1631 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n");
michael@0 1632 goto loser;
michael@0 1633 }
michael@0 1634
michael@0 1635 free(tmp); tmp = (void *)NULL;
michael@0 1636
michael@0 1637 /* UCS-4 -> UTF-8 */
michael@0 1638 len = strlen(ucs4_utf8);
michael@0 1639 tmp = calloc(len, 1);
michael@0 1640 if( (void *)NULL == tmp ) {
michael@0 1641 fprintf(stderr, "out of memory\n");
michael@0 1642 exit(1);
michael@0 1643 }
michael@0 1644
michael@0 1645 result = sec_port_ucs4_utf8_conversion_function(PR_FALSE,
michael@0 1646 (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32),
michael@0 1647 tmp, len, &lenout);
michael@0 1648 if( !result ) {
michael@0 1649 fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n");
michael@0 1650 goto done;
michael@0 1651 }
michael@0 1652
michael@0 1653 if( lenout != len ) {
michael@0 1654 fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n");
michael@0 1655 goto loser;
michael@0 1656 }
michael@0 1657
michael@0 1658 if( 0 != strncmp(ucs4_utf8, tmp, len) ) {
michael@0 1659 fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n");
michael@0 1660 goto loser;
michael@0 1661 }
michael@0 1662
michael@0 1663 free(tmp); tmp = (void *)NULL;
michael@0 1664
michael@0 1665 /* UTF-8 -> UCS-2 */
michael@0 1666 len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16);
michael@0 1667 tmp = calloc(len, 1);
michael@0 1668 if( (void *)NULL == tmp ) {
michael@0 1669 fprintf(stderr, "out of memory\n");
michael@0 1670 exit(1);
michael@0 1671 }
michael@0 1672
michael@0 1673 result = sec_port_ucs2_utf8_conversion_function(PR_TRUE,
michael@0 1674 ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout);
michael@0 1675 if( !result ) {
michael@0 1676 fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n");
michael@0 1677 goto done;
michael@0 1678 }
michael@0 1679
michael@0 1680 if( lenout != len ) {
michael@0 1681 fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n");
michael@0 1682 goto loser;
michael@0 1683 }
michael@0 1684
michael@0 1685 if( 0 != memcmp(ucs2s, tmp, len) ) {
michael@0 1686 fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n");
michael@0 1687 goto loser;
michael@0 1688 }
michael@0 1689
michael@0 1690 free(tmp); tmp = (void *)NULL;
michael@0 1691
michael@0 1692 /* UCS-2 -> UTF-8 */
michael@0 1693 len = strlen(ucs2_utf8);
michael@0 1694 tmp = calloc(len, 1);
michael@0 1695 if( (void *)NULL == tmp ) {
michael@0 1696 fprintf(stderr, "out of memory\n");
michael@0 1697 exit(1);
michael@0 1698 }
michael@0 1699
michael@0 1700 result = sec_port_ucs2_utf8_conversion_function(PR_FALSE,
michael@0 1701 (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16),
michael@0 1702 tmp, len, &lenout);
michael@0 1703 if( !result ) {
michael@0 1704 fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n");
michael@0 1705 goto done;
michael@0 1706 }
michael@0 1707
michael@0 1708 if( lenout != len ) {
michael@0 1709 fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n");
michael@0 1710 goto loser;
michael@0 1711 }
michael@0 1712
michael@0 1713 if( 0 != strncmp(ucs2_utf8, tmp, len) ) {
michael@0 1714 fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n");
michael@0 1715 goto loser;
michael@0 1716 }
michael@0 1717
michael@0 1718 /* implement UTF16 */
michael@0 1719
michael@0 1720 result = PR_TRUE;
michael@0 1721 goto done;
michael@0 1722
michael@0 1723 loser:
michael@0 1724 result = PR_FALSE;
michael@0 1725 done:
michael@0 1726 free(ucs4s);
michael@0 1727 free(ucs4_utf8);
michael@0 1728 free(ucs2s);
michael@0 1729 free(ucs2_utf8);
michael@0 1730 if( (void *)NULL != tmp ) free(tmp);
michael@0 1731 return result;
michael@0 1732 }
michael@0 1733
michael@0 1734 void
michael@0 1735 byte_order
michael@0 1736 (
michael@0 1737 void
michael@0 1738 )
michael@0 1739 {
michael@0 1740 /*
michael@0 1741 * The implementation (now) expects the 16- and 32-bit characters
michael@0 1742 * to be in network byte order, not host byte order. Therefore I
michael@0 1743 * have to byteswap all those test vectors above. hton[ls] may be
michael@0 1744 * functions, so I have to do this dynamically. If you want to
michael@0 1745 * use this code to do host byte order conversions, just remove
michael@0 1746 * the call in main() to this function.
michael@0 1747 */
michael@0 1748
michael@0 1749 int i;
michael@0 1750
michael@0 1751 for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) {
michael@0 1752 struct ucs4 *e = &ucs4[i];
michael@0 1753 e->c = htonl(e->c);
michael@0 1754 }
michael@0 1755
michael@0 1756 for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) {
michael@0 1757 struct ucs2 *e = &ucs2[i];
michael@0 1758 e->c = htons(e->c);
michael@0 1759 }
michael@0 1760
michael@0 1761 for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) {
michael@0 1762 struct utf16 *e = &utf16[i];
michael@0 1763 e->c = htonl(e->c);
michael@0 1764 e->w[0] = htons(e->w[0]);
michael@0 1765 e->w[1] = htons(e->w[1]);
michael@0 1766 }
michael@0 1767
michael@0 1768 return;
michael@0 1769 }
michael@0 1770
michael@0 1771 int
michael@0 1772 main
michael@0 1773 (
michael@0 1774 int argc,
michael@0 1775 char *argv[]
michael@0 1776 )
michael@0 1777 {
michael@0 1778 byte_order();
michael@0 1779
michael@0 1780 if( test_ucs4_chars() &&
michael@0 1781 test_ucs2_chars() &&
michael@0 1782 test_utf16_chars() &&
michael@0 1783 test_utf8_bad_chars() &&
michael@0 1784 test_iso88591_chars() &&
michael@0 1785 test_zeroes() &&
michael@0 1786 test_multichars() &&
michael@0 1787 PR_TRUE ) {
michael@0 1788 fprintf(stderr, "PASS\n");
michael@0 1789 return 1;
michael@0 1790 } else {
michael@0 1791 fprintf(stderr, "FAIL\n");
michael@0 1792 return 0;
michael@0 1793 }
michael@0 1794 }
michael@0 1795
michael@0 1796 #endif /* TEST_UTF8 */

mercurial