1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/nss/lib/util/utf8.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1796 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include "seccomon.h" 1.9 +#include "secport.h" 1.10 + 1.11 +#ifdef TEST_UTF8 1.12 +#include <assert.h> 1.13 +#undef PORT_Assert 1.14 +#define PORT_Assert assert 1.15 +#endif 1.16 + 1.17 +/* 1.18 + * From RFC 2044: 1.19 + * 1.20 + * UCS-4 range (hex.) UTF-8 octet sequence (binary) 1.21 + * 0000 0000-0000 007F 0xxxxxxx 1.22 + * 0000 0080-0000 07FF 110xxxxx 10xxxxxx 1.23 + * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx 1.24 + * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 1.25 + * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 1.26 + * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx 1.27 + */ 1.28 + 1.29 +/* 1.30 + * From http://www.imc.org/draft-hoffman-utf16 1.31 + * 1.32 + * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 1.33 + * 1.34 + * U' = yyyyyyyyyyxxxxxxxxxx 1.35 + * W1 = 110110yyyyyyyyyy 1.36 + * W2 = 110111xxxxxxxxxx 1.37 + */ 1.38 + 1.39 +/* 1.40 + * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit 1.41 + * character values. If you wish to use this code for working with 1.42 + * host byte order values, define the following: 1.43 + * 1.44 + * #if IS_BIG_ENDIAN 1.45 + * #define L_0 0 1.46 + * #define L_1 1 1.47 + * #define L_2 2 1.48 + * #define L_3 3 1.49 + * #define H_0 0 1.50 + * #define H_1 1 1.51 + * #else / * not everyone has elif * / 1.52 + * #if IS_LITTLE_ENDIAN 1.53 + * #define L_0 3 1.54 + * #define L_1 2 1.55 + * #define L_2 1 1.56 + * #define L_3 0 1.57 + * #define H_0 1 1.58 + * #define H_1 0 1.59 + * #else 1.60 + * #error "PDP and NUXI support deferred" 1.61 + * #endif / * IS_LITTLE_ENDIAN * / 1.62 + * #endif / * IS_BIG_ENDIAN * / 1.63 + */ 1.64 + 1.65 +#define L_0 0 1.66 +#define L_1 1 1.67 +#define L_2 2 1.68 +#define L_3 3 1.69 +#define H_0 0 1.70 +#define H_1 1 1.71 + 1.72 +#define BAD_UTF8 ((PRUint32)-1) 1.73 + 1.74 +/* 1.75 + * Parse a single UTF-8 character per the spec. in section 3.9 (D36) 1.76 + * of Unicode 4.0.0. 1.77 + * 1.78 + * Parameters: 1.79 + * index - Points to the byte offset in inBuf of character to read. On success, 1.80 + * updated to the offset of the following character. 1.81 + * inBuf - Input buffer, UTF-8 encoded 1.82 + * inbufLen - Length of input buffer, in bytes. 1.83 + * 1.84 + * Returns: 1.85 + * Success - The UCS4 encoded character 1.86 + * Failure - BAD_UTF8 1.87 + */ 1.88 +static PRUint32 1.89 +sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen) 1.90 +{ 1.91 + PRUint32 result; 1.92 + unsigned int i = *index; 1.93 + int bytes_left; 1.94 + PRUint32 min_value; 1.95 + 1.96 + PORT_Assert(i < inBufLen); 1.97 + 1.98 + if ( (inBuf[i] & 0x80) == 0x00 ) { 1.99 + result = inBuf[i++]; 1.100 + bytes_left = 0; 1.101 + min_value = 0; 1.102 + } else if ( (inBuf[i] & 0xE0) == 0xC0 ) { 1.103 + result = inBuf[i++] & 0x1F; 1.104 + bytes_left = 1; 1.105 + min_value = 0x80; 1.106 + } else if ( (inBuf[i] & 0xF0) == 0xE0) { 1.107 + result = inBuf[i++] & 0x0F; 1.108 + bytes_left = 2; 1.109 + min_value = 0x800; 1.110 + } else if ( (inBuf[i] & 0xF8) == 0xF0) { 1.111 + result = inBuf[i++] & 0x07; 1.112 + bytes_left = 3; 1.113 + min_value = 0x10000; 1.114 + } else { 1.115 + return BAD_UTF8; 1.116 + } 1.117 + 1.118 + while (bytes_left--) { 1.119 + if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) return BAD_UTF8; 1.120 + result = (result << 6) | (inBuf[i++] & 0x3F); 1.121 + } 1.122 + 1.123 + /* Check for overlong sequences, surrogates, and outside unicode range */ 1.124 + if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) { 1.125 + return BAD_UTF8; 1.126 + } 1.127 + 1.128 + *index = i; 1.129 + return result; 1.130 +} 1.131 + 1.132 +PRBool 1.133 +sec_port_ucs4_utf8_conversion_function 1.134 +( 1.135 + PRBool toUnicode, 1.136 + unsigned char *inBuf, 1.137 + unsigned int inBufLen, 1.138 + unsigned char *outBuf, 1.139 + unsigned int maxOutBufLen, 1.140 + unsigned int *outBufLen 1.141 +) 1.142 +{ 1.143 + PORT_Assert((unsigned int *)NULL != outBufLen); 1.144 + 1.145 + if( toUnicode ) { 1.146 + unsigned int i, len = 0; 1.147 + 1.148 + for( i = 0; i < inBufLen; ) { 1.149 + if( (inBuf[i] & 0x80) == 0x00 ) i += 1; 1.150 + else if( (inBuf[i] & 0xE0) == 0xC0 ) i += 2; 1.151 + else if( (inBuf[i] & 0xF0) == 0xE0 ) i += 3; 1.152 + else if( (inBuf[i] & 0xF8) == 0xF0 ) i += 4; 1.153 + else return PR_FALSE; 1.154 + 1.155 + len += 4; 1.156 + } 1.157 + 1.158 + if( len > maxOutBufLen ) { 1.159 + *outBufLen = len; 1.160 + return PR_FALSE; 1.161 + } 1.162 + 1.163 + len = 0; 1.164 + 1.165 + for( i = 0; i < inBufLen; ) { 1.166 + PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); 1.167 + 1.168 + if (ucs4 == BAD_UTF8) return PR_FALSE; 1.169 + 1.170 + outBuf[len+L_0] = 0x00; 1.171 + outBuf[len+L_1] = (unsigned char)(ucs4 >> 16); 1.172 + outBuf[len+L_2] = (unsigned char)(ucs4 >> 8); 1.173 + outBuf[len+L_3] = (unsigned char)ucs4; 1.174 + 1.175 + len += 4; 1.176 + } 1.177 + 1.178 + *outBufLen = len; 1.179 + return PR_TRUE; 1.180 + } else { 1.181 + unsigned int i, len = 0; 1.182 + PORT_Assert((inBufLen % 4) == 0); 1.183 + if ((inBufLen % 4) != 0) { 1.184 + *outBufLen = 0; 1.185 + return PR_FALSE; 1.186 + } 1.187 + 1.188 + for( i = 0; i < inBufLen; i += 4 ) { 1.189 + if( (inBuf[i+L_0] > 0x00) || (inBuf[i+L_1] > 0x10) ) { 1.190 + *outBufLen = 0; 1.191 + return PR_FALSE; 1.192 + } else if( inBuf[i+L_1] >= 0x01 ) len += 4; 1.193 + else if( inBuf[i+L_2] >= 0x08 ) len += 3; 1.194 + else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) len += 2; 1.195 + else len += 1; 1.196 + } 1.197 + 1.198 + if( len > maxOutBufLen ) { 1.199 + *outBufLen = len; 1.200 + return PR_FALSE; 1.201 + } 1.202 + 1.203 + len = 0; 1.204 + 1.205 + for( i = 0; i < inBufLen; i += 4 ) { 1.206 + if( inBuf[i+L_1] >= 0x01 ) { 1.207 + /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 1.208 + /* 00000000 000abcde fghijklm nopqrstu -> 1.209 + 11110abc 10defghi 10jklmno 10pqrstu */ 1.210 + 1.211 + outBuf[len+0] = 0xF0 | ((inBuf[i+L_1] & 0x1C) >> 2); 1.212 + outBuf[len+1] = 0x80 | ((inBuf[i+L_1] & 0x03) << 4) 1.213 + | ((inBuf[i+L_2] & 0xF0) >> 4); 1.214 + outBuf[len+2] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) 1.215 + | ((inBuf[i+L_3] & 0xC0) >> 6); 1.216 + outBuf[len+3] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); 1.217 + 1.218 + len += 4; 1.219 + } else if( inBuf[i+L_2] >= 0x08 ) { 1.220 + /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ 1.221 + /* 00000000 00000000 abcdefgh ijklmnop -> 1.222 + 1110abcd 10efghij 10klmnop */ 1.223 + 1.224 + outBuf[len+0] = 0xE0 | ((inBuf[i+L_2] & 0xF0) >> 4); 1.225 + outBuf[len+1] = 0x80 | ((inBuf[i+L_2] & 0x0F) << 2) 1.226 + | ((inBuf[i+L_3] & 0xC0) >> 6); 1.227 + outBuf[len+2] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); 1.228 + 1.229 + len += 3; 1.230 + } else if( (inBuf[i+L_2] > 0x00) || (inBuf[i+L_3] >= 0x80) ) { 1.231 + /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ 1.232 + /* 00000000 00000000 00000abc defghijk -> 1.233 + 110abcde 10fghijk */ 1.234 + 1.235 + outBuf[len+0] = 0xC0 | ((inBuf[i+L_2] & 0x07) << 2) 1.236 + | ((inBuf[i+L_3] & 0xC0) >> 6); 1.237 + outBuf[len+1] = 0x80 | ((inBuf[i+L_3] & 0x3F) >> 0); 1.238 + 1.239 + len += 2; 1.240 + } else { 1.241 + /* 0000 0000-0000 007F -> 0xxxxxx */ 1.242 + /* 00000000 00000000 00000000 0abcdefg -> 1.243 + 0abcdefg */ 1.244 + 1.245 + outBuf[len+0] = (inBuf[i+L_3] & 0x7F); 1.246 + 1.247 + len += 1; 1.248 + } 1.249 + } 1.250 + 1.251 + *outBufLen = len; 1.252 + return PR_TRUE; 1.253 + } 1.254 +} 1.255 + 1.256 +PRBool 1.257 +sec_port_ucs2_utf8_conversion_function 1.258 +( 1.259 + PRBool toUnicode, 1.260 + unsigned char *inBuf, 1.261 + unsigned int inBufLen, 1.262 + unsigned char *outBuf, 1.263 + unsigned int maxOutBufLen, 1.264 + unsigned int *outBufLen 1.265 +) 1.266 +{ 1.267 + PORT_Assert((unsigned int *)NULL != outBufLen); 1.268 + 1.269 + if( toUnicode ) { 1.270 + unsigned int i, len = 0; 1.271 + 1.272 + for( i = 0; i < inBufLen; ) { 1.273 + if( (inBuf[i] & 0x80) == 0x00 ) { 1.274 + i += 1; 1.275 + len += 2; 1.276 + } else if( (inBuf[i] & 0xE0) == 0xC0 ) { 1.277 + i += 2; 1.278 + len += 2; 1.279 + } else if( (inBuf[i] & 0xF0) == 0xE0 ) { 1.280 + i += 3; 1.281 + len += 2; 1.282 + } else if( (inBuf[i] & 0xF8) == 0xF0 ) { 1.283 + i += 4; 1.284 + len += 4; 1.285 + } else return PR_FALSE; 1.286 + } 1.287 + 1.288 + if( len > maxOutBufLen ) { 1.289 + *outBufLen = len; 1.290 + return PR_FALSE; 1.291 + } 1.292 + 1.293 + len = 0; 1.294 + 1.295 + for( i = 0; i < inBufLen; ) { 1.296 + PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); 1.297 + 1.298 + if (ucs4 == BAD_UTF8) return PR_FALSE; 1.299 + 1.300 + if( ucs4 < 0x10000) { 1.301 + outBuf[len+H_0] = (unsigned char)(ucs4 >> 8); 1.302 + outBuf[len+H_1] = (unsigned char)ucs4; 1.303 + len += 2; 1.304 + } else { 1.305 + ucs4 -= 0x10000; 1.306 + outBuf[len+0+H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); 1.307 + outBuf[len+0+H_1] = (unsigned char)(ucs4 >> 10); 1.308 + outBuf[len+2+H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); 1.309 + outBuf[len+2+H_1] = (unsigned char)ucs4; 1.310 + len += 4; 1.311 + } 1.312 + } 1.313 + 1.314 + *outBufLen = len; 1.315 + return PR_TRUE; 1.316 + } else { 1.317 + unsigned int i, len = 0; 1.318 + PORT_Assert((inBufLen % 2) == 0); 1.319 + if ((inBufLen % 2) != 0) { 1.320 + *outBufLen = 0; 1.321 + return PR_FALSE; 1.322 + } 1.323 + 1.324 + for( i = 0; i < inBufLen; i += 2 ) { 1.325 + if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_0] & 0x80) == 0x00) ) len += 1; 1.326 + else if( inBuf[i+H_0] < 0x08 ) len += 2; 1.327 + else if( ((inBuf[i+0+H_0] & 0xDC) == 0xD8) ) { 1.328 + if( ((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2) ) { 1.329 + i += 2; 1.330 + len += 4; 1.331 + } else { 1.332 + return PR_FALSE; 1.333 + } 1.334 + } 1.335 + else len += 3; 1.336 + } 1.337 + 1.338 + if( len > maxOutBufLen ) { 1.339 + *outBufLen = len; 1.340 + return PR_FALSE; 1.341 + } 1.342 + 1.343 + len = 0; 1.344 + 1.345 + for( i = 0; i < inBufLen; i += 2 ) { 1.346 + if( (inBuf[i+H_0] == 0x00) && ((inBuf[i+H_1] & 0x80) == 0x00) ) { 1.347 + /* 0000-007F -> 0xxxxxx */ 1.348 + /* 00000000 0abcdefg -> 0abcdefg */ 1.349 + 1.350 + outBuf[len] = inBuf[i+H_1] & 0x7F; 1.351 + 1.352 + len += 1; 1.353 + } else if( inBuf[i+H_0] < 0x08 ) { 1.354 + /* 0080-07FF -> 110xxxxx 10xxxxxx */ 1.355 + /* 00000abc defghijk -> 110abcde 10fghijk */ 1.356 + 1.357 + outBuf[len+0] = 0xC0 | ((inBuf[i+H_0] & 0x07) << 2) 1.358 + | ((inBuf[i+H_1] & 0xC0) >> 6); 1.359 + outBuf[len+1] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); 1.360 + 1.361 + len += 2; 1.362 + } else if( (inBuf[i+H_0] & 0xDC) == 0xD8 ) { 1.363 + int abcde, BCDE; 1.364 + 1.365 + PORT_Assert(((inBuf[i+2+H_0] & 0xDC) == 0xDC) && ((inBufLen - i) > 2)); 1.366 + 1.367 + /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 1.368 + /* 110110BC DEfghijk 110111lm nopqrstu -> 1.369 + { Let abcde = BCDE + 1 } 1.370 + 11110abc 10defghi 10jklmno 10pqrstu */ 1.371 + 1.372 + BCDE = ((inBuf[i+H_0] & 0x03) << 2) | ((inBuf[i+H_1] & 0xC0) >> 6); 1.373 + abcde = BCDE + 1; 1.374 + 1.375 + outBuf[len+0] = 0xF0 | ((abcde & 0x1C) >> 2); 1.376 + outBuf[len+1] = 0x80 | ((abcde & 0x03) << 4) 1.377 + | ((inBuf[i+0+H_1] & 0x3C) >> 2); 1.378 + outBuf[len+2] = 0x80 | ((inBuf[i+0+H_1] & 0x03) << 4) 1.379 + | ((inBuf[i+2+H_0] & 0x03) << 2) 1.380 + | ((inBuf[i+2+H_1] & 0xC0) >> 6); 1.381 + outBuf[len+3] = 0x80 | ((inBuf[i+2+H_1] & 0x3F) >> 0); 1.382 + 1.383 + i += 2; 1.384 + len += 4; 1.385 + } else { 1.386 + /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ 1.387 + /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ 1.388 + 1.389 + outBuf[len+0] = 0xE0 | ((inBuf[i+H_0] & 0xF0) >> 4); 1.390 + outBuf[len+1] = 0x80 | ((inBuf[i+H_0] & 0x0F) << 2) 1.391 + | ((inBuf[i+H_1] & 0xC0) >> 6); 1.392 + outBuf[len+2] = 0x80 | ((inBuf[i+H_1] & 0x3F) >> 0); 1.393 + 1.394 + len += 3; 1.395 + } 1.396 + } 1.397 + 1.398 + *outBufLen = len; 1.399 + return PR_TRUE; 1.400 + } 1.401 +} 1.402 + 1.403 +PRBool 1.404 +sec_port_iso88591_utf8_conversion_function 1.405 +( 1.406 + const unsigned char *inBuf, 1.407 + unsigned int inBufLen, 1.408 + unsigned char *outBuf, 1.409 + unsigned int maxOutBufLen, 1.410 + unsigned int *outBufLen 1.411 +) 1.412 +{ 1.413 + unsigned int i, len = 0; 1.414 + 1.415 + PORT_Assert((unsigned int *)NULL != outBufLen); 1.416 + 1.417 + for( i = 0; i < inBufLen; i++) { 1.418 + if( (inBuf[i] & 0x80) == 0x00 ) len += 1; 1.419 + else len += 2; 1.420 + } 1.421 + 1.422 + if( len > maxOutBufLen ) { 1.423 + *outBufLen = len; 1.424 + return PR_FALSE; 1.425 + } 1.426 + 1.427 + len = 0; 1.428 + 1.429 + for( i = 0; i < inBufLen; i++) { 1.430 + if( (inBuf[i] & 0x80) == 0x00 ) { 1.431 + /* 00-7F -> 0xxxxxxx */ 1.432 + /* 0abcdefg -> 0abcdefg */ 1.433 + 1.434 + outBuf[len] = inBuf[i]; 1.435 + len += 1; 1.436 + } else { 1.437 + /* 80-FF <- 110xxxxx 10xxxxxx */ 1.438 + /* 00000000 abcdefgh -> 110000ab 10cdefgh */ 1.439 + 1.440 + outBuf[len+0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); 1.441 + outBuf[len+1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); 1.442 + 1.443 + len += 2; 1.444 + } 1.445 + } 1.446 + 1.447 + *outBufLen = len; 1.448 + return PR_TRUE; 1.449 +} 1.450 + 1.451 +#ifdef TEST_UTF8 1.452 + 1.453 +#include <stdio.h> 1.454 +#include <string.h> 1.455 +#include <stdlib.h> 1.456 +#include <netinet/in.h> /* for htonl and htons */ 1.457 + 1.458 +/* 1.459 + * UCS-4 vectors 1.460 + */ 1.461 + 1.462 +struct ucs4 { 1.463 + PRUint32 c; 1.464 + char *utf8; 1.465 +}; 1.466 + 1.467 +/* 1.468 + * UCS-2 vectors 1.469 + */ 1.470 + 1.471 +struct ucs2 { 1.472 + PRUint16 c; 1.473 + char *utf8; 1.474 +}; 1.475 + 1.476 +/* 1.477 + * UTF-16 vectors 1.478 + */ 1.479 + 1.480 +struct utf16 { 1.481 + PRUint32 c; 1.482 + PRUint16 w[2]; 1.483 +}; 1.484 + 1.485 + 1.486 +/* 1.487 + * UCS-4 vectors 1.488 + */ 1.489 + 1.490 +struct ucs4 ucs4[] = { 1.491 + { 0x00000001, "\x01" }, 1.492 + { 0x00000002, "\x02" }, 1.493 + { 0x00000003, "\x03" }, 1.494 + { 0x00000004, "\x04" }, 1.495 + { 0x00000007, "\x07" }, 1.496 + { 0x00000008, "\x08" }, 1.497 + { 0x0000000F, "\x0F" }, 1.498 + { 0x00000010, "\x10" }, 1.499 + { 0x0000001F, "\x1F" }, 1.500 + { 0x00000020, "\x20" }, 1.501 + { 0x0000003F, "\x3F" }, 1.502 + { 0x00000040, "\x40" }, 1.503 + { 0x0000007F, "\x7F" }, 1.504 + 1.505 + { 0x00000080, "\xC2\x80" }, 1.506 + { 0x00000081, "\xC2\x81" }, 1.507 + { 0x00000082, "\xC2\x82" }, 1.508 + { 0x00000084, "\xC2\x84" }, 1.509 + { 0x00000088, "\xC2\x88" }, 1.510 + { 0x00000090, "\xC2\x90" }, 1.511 + { 0x000000A0, "\xC2\xA0" }, 1.512 + { 0x000000C0, "\xC3\x80" }, 1.513 + { 0x000000FF, "\xC3\xBF" }, 1.514 + { 0x00000100, "\xC4\x80" }, 1.515 + { 0x00000101, "\xC4\x81" }, 1.516 + { 0x00000102, "\xC4\x82" }, 1.517 + { 0x00000104, "\xC4\x84" }, 1.518 + { 0x00000108, "\xC4\x88" }, 1.519 + { 0x00000110, "\xC4\x90" }, 1.520 + { 0x00000120, "\xC4\xA0" }, 1.521 + { 0x00000140, "\xC5\x80" }, 1.522 + { 0x00000180, "\xC6\x80" }, 1.523 + { 0x000001FF, "\xC7\xBF" }, 1.524 + { 0x00000200, "\xC8\x80" }, 1.525 + { 0x00000201, "\xC8\x81" }, 1.526 + { 0x00000202, "\xC8\x82" }, 1.527 + { 0x00000204, "\xC8\x84" }, 1.528 + { 0x00000208, "\xC8\x88" }, 1.529 + { 0x00000210, "\xC8\x90" }, 1.530 + { 0x00000220, "\xC8\xA0" }, 1.531 + { 0x00000240, "\xC9\x80" }, 1.532 + { 0x00000280, "\xCA\x80" }, 1.533 + { 0x00000300, "\xCC\x80" }, 1.534 + { 0x000003FF, "\xCF\xBF" }, 1.535 + { 0x00000400, "\xD0\x80" }, 1.536 + { 0x00000401, "\xD0\x81" }, 1.537 + { 0x00000402, "\xD0\x82" }, 1.538 + { 0x00000404, "\xD0\x84" }, 1.539 + { 0x00000408, "\xD0\x88" }, 1.540 + { 0x00000410, "\xD0\x90" }, 1.541 + { 0x00000420, "\xD0\xA0" }, 1.542 + { 0x00000440, "\xD1\x80" }, 1.543 + { 0x00000480, "\xD2\x80" }, 1.544 + { 0x00000500, "\xD4\x80" }, 1.545 + { 0x00000600, "\xD8\x80" }, 1.546 + { 0x000007FF, "\xDF\xBF" }, 1.547 + 1.548 + { 0x00000800, "\xE0\xA0\x80" }, 1.549 + { 0x00000801, "\xE0\xA0\x81" }, 1.550 + { 0x00000802, "\xE0\xA0\x82" }, 1.551 + { 0x00000804, "\xE0\xA0\x84" }, 1.552 + { 0x00000808, "\xE0\xA0\x88" }, 1.553 + { 0x00000810, "\xE0\xA0\x90" }, 1.554 + { 0x00000820, "\xE0\xA0\xA0" }, 1.555 + { 0x00000840, "\xE0\xA1\x80" }, 1.556 + { 0x00000880, "\xE0\xA2\x80" }, 1.557 + { 0x00000900, "\xE0\xA4\x80" }, 1.558 + { 0x00000A00, "\xE0\xA8\x80" }, 1.559 + { 0x00000C00, "\xE0\xB0\x80" }, 1.560 + { 0x00000FFF, "\xE0\xBF\xBF" }, 1.561 + { 0x00001000, "\xE1\x80\x80" }, 1.562 + { 0x00001001, "\xE1\x80\x81" }, 1.563 + { 0x00001002, "\xE1\x80\x82" }, 1.564 + { 0x00001004, "\xE1\x80\x84" }, 1.565 + { 0x00001008, "\xE1\x80\x88" }, 1.566 + { 0x00001010, "\xE1\x80\x90" }, 1.567 + { 0x00001020, "\xE1\x80\xA0" }, 1.568 + { 0x00001040, "\xE1\x81\x80" }, 1.569 + { 0x00001080, "\xE1\x82\x80" }, 1.570 + { 0x00001100, "\xE1\x84\x80" }, 1.571 + { 0x00001200, "\xE1\x88\x80" }, 1.572 + { 0x00001400, "\xE1\x90\x80" }, 1.573 + { 0x00001800, "\xE1\xA0\x80" }, 1.574 + { 0x00001FFF, "\xE1\xBF\xBF" }, 1.575 + { 0x00002000, "\xE2\x80\x80" }, 1.576 + { 0x00002001, "\xE2\x80\x81" }, 1.577 + { 0x00002002, "\xE2\x80\x82" }, 1.578 + { 0x00002004, "\xE2\x80\x84" }, 1.579 + { 0x00002008, "\xE2\x80\x88" }, 1.580 + { 0x00002010, "\xE2\x80\x90" }, 1.581 + { 0x00002020, "\xE2\x80\xA0" }, 1.582 + { 0x00002040, "\xE2\x81\x80" }, 1.583 + { 0x00002080, "\xE2\x82\x80" }, 1.584 + { 0x00002100, "\xE2\x84\x80" }, 1.585 + { 0x00002200, "\xE2\x88\x80" }, 1.586 + { 0x00002400, "\xE2\x90\x80" }, 1.587 + { 0x00002800, "\xE2\xA0\x80" }, 1.588 + { 0x00003000, "\xE3\x80\x80" }, 1.589 + { 0x00003FFF, "\xE3\xBF\xBF" }, 1.590 + { 0x00004000, "\xE4\x80\x80" }, 1.591 + { 0x00004001, "\xE4\x80\x81" }, 1.592 + { 0x00004002, "\xE4\x80\x82" }, 1.593 + { 0x00004004, "\xE4\x80\x84" }, 1.594 + { 0x00004008, "\xE4\x80\x88" }, 1.595 + { 0x00004010, "\xE4\x80\x90" }, 1.596 + { 0x00004020, "\xE4\x80\xA0" }, 1.597 + { 0x00004040, "\xE4\x81\x80" }, 1.598 + { 0x00004080, "\xE4\x82\x80" }, 1.599 + { 0x00004100, "\xE4\x84\x80" }, 1.600 + { 0x00004200, "\xE4\x88\x80" }, 1.601 + { 0x00004400, "\xE4\x90\x80" }, 1.602 + { 0x00004800, "\xE4\xA0\x80" }, 1.603 + { 0x00005000, "\xE5\x80\x80" }, 1.604 + { 0x00006000, "\xE6\x80\x80" }, 1.605 + { 0x00007FFF, "\xE7\xBF\xBF" }, 1.606 + { 0x00008000, "\xE8\x80\x80" }, 1.607 + { 0x00008001, "\xE8\x80\x81" }, 1.608 + { 0x00008002, "\xE8\x80\x82" }, 1.609 + { 0x00008004, "\xE8\x80\x84" }, 1.610 + { 0x00008008, "\xE8\x80\x88" }, 1.611 + { 0x00008010, "\xE8\x80\x90" }, 1.612 + { 0x00008020, "\xE8\x80\xA0" }, 1.613 + { 0x00008040, "\xE8\x81\x80" }, 1.614 + { 0x00008080, "\xE8\x82\x80" }, 1.615 + { 0x00008100, "\xE8\x84\x80" }, 1.616 + { 0x00008200, "\xE8\x88\x80" }, 1.617 + { 0x00008400, "\xE8\x90\x80" }, 1.618 + { 0x00008800, "\xE8\xA0\x80" }, 1.619 + { 0x00009000, "\xE9\x80\x80" }, 1.620 + { 0x0000A000, "\xEA\x80\x80" }, 1.621 + { 0x0000C000, "\xEC\x80\x80" }, 1.622 + { 0x0000FFFF, "\xEF\xBF\xBF" }, 1.623 + 1.624 + { 0x00010000, "\xF0\x90\x80\x80" }, 1.625 + { 0x00010001, "\xF0\x90\x80\x81" }, 1.626 + { 0x00010002, "\xF0\x90\x80\x82" }, 1.627 + { 0x00010004, "\xF0\x90\x80\x84" }, 1.628 + { 0x00010008, "\xF0\x90\x80\x88" }, 1.629 + { 0x00010010, "\xF0\x90\x80\x90" }, 1.630 + { 0x00010020, "\xF0\x90\x80\xA0" }, 1.631 + { 0x00010040, "\xF0\x90\x81\x80" }, 1.632 + { 0x00010080, "\xF0\x90\x82\x80" }, 1.633 + { 0x00010100, "\xF0\x90\x84\x80" }, 1.634 + { 0x00010200, "\xF0\x90\x88\x80" }, 1.635 + { 0x00010400, "\xF0\x90\x90\x80" }, 1.636 + { 0x00010800, "\xF0\x90\xA0\x80" }, 1.637 + { 0x00011000, "\xF0\x91\x80\x80" }, 1.638 + { 0x00012000, "\xF0\x92\x80\x80" }, 1.639 + { 0x00014000, "\xF0\x94\x80\x80" }, 1.640 + { 0x00018000, "\xF0\x98\x80\x80" }, 1.641 + { 0x0001FFFF, "\xF0\x9F\xBF\xBF" }, 1.642 + { 0x00020000, "\xF0\xA0\x80\x80" }, 1.643 + { 0x00020001, "\xF0\xA0\x80\x81" }, 1.644 + { 0x00020002, "\xF0\xA0\x80\x82" }, 1.645 + { 0x00020004, "\xF0\xA0\x80\x84" }, 1.646 + { 0x00020008, "\xF0\xA0\x80\x88" }, 1.647 + { 0x00020010, "\xF0\xA0\x80\x90" }, 1.648 + { 0x00020020, "\xF0\xA0\x80\xA0" }, 1.649 + { 0x00020040, "\xF0\xA0\x81\x80" }, 1.650 + { 0x00020080, "\xF0\xA0\x82\x80" }, 1.651 + { 0x00020100, "\xF0\xA0\x84\x80" }, 1.652 + { 0x00020200, "\xF0\xA0\x88\x80" }, 1.653 + { 0x00020400, "\xF0\xA0\x90\x80" }, 1.654 + { 0x00020800, "\xF0\xA0\xA0\x80" }, 1.655 + { 0x00021000, "\xF0\xA1\x80\x80" }, 1.656 + { 0x00022000, "\xF0\xA2\x80\x80" }, 1.657 + { 0x00024000, "\xF0\xA4\x80\x80" }, 1.658 + { 0x00028000, "\xF0\xA8\x80\x80" }, 1.659 + { 0x00030000, "\xF0\xB0\x80\x80" }, 1.660 + { 0x0003FFFF, "\xF0\xBF\xBF\xBF" }, 1.661 + { 0x00040000, "\xF1\x80\x80\x80" }, 1.662 + { 0x00040001, "\xF1\x80\x80\x81" }, 1.663 + { 0x00040002, "\xF1\x80\x80\x82" }, 1.664 + { 0x00040004, "\xF1\x80\x80\x84" }, 1.665 + { 0x00040008, "\xF1\x80\x80\x88" }, 1.666 + { 0x00040010, "\xF1\x80\x80\x90" }, 1.667 + { 0x00040020, "\xF1\x80\x80\xA0" }, 1.668 + { 0x00040040, "\xF1\x80\x81\x80" }, 1.669 + { 0x00040080, "\xF1\x80\x82\x80" }, 1.670 + { 0x00040100, "\xF1\x80\x84\x80" }, 1.671 + { 0x00040200, "\xF1\x80\x88\x80" }, 1.672 + { 0x00040400, "\xF1\x80\x90\x80" }, 1.673 + { 0x00040800, "\xF1\x80\xA0\x80" }, 1.674 + { 0x00041000, "\xF1\x81\x80\x80" }, 1.675 + { 0x00042000, "\xF1\x82\x80\x80" }, 1.676 + { 0x00044000, "\xF1\x84\x80\x80" }, 1.677 + { 0x00048000, "\xF1\x88\x80\x80" }, 1.678 + { 0x00050000, "\xF1\x90\x80\x80" }, 1.679 + { 0x00060000, "\xF1\xA0\x80\x80" }, 1.680 + { 0x0007FFFF, "\xF1\xBF\xBF\xBF" }, 1.681 + { 0x00080000, "\xF2\x80\x80\x80" }, 1.682 + { 0x00080001, "\xF2\x80\x80\x81" }, 1.683 + { 0x00080002, "\xF2\x80\x80\x82" }, 1.684 + { 0x00080004, "\xF2\x80\x80\x84" }, 1.685 + { 0x00080008, "\xF2\x80\x80\x88" }, 1.686 + { 0x00080010, "\xF2\x80\x80\x90" }, 1.687 + { 0x00080020, "\xF2\x80\x80\xA0" }, 1.688 + { 0x00080040, "\xF2\x80\x81\x80" }, 1.689 + { 0x00080080, "\xF2\x80\x82\x80" }, 1.690 + { 0x00080100, "\xF2\x80\x84\x80" }, 1.691 + { 0x00080200, "\xF2\x80\x88\x80" }, 1.692 + { 0x00080400, "\xF2\x80\x90\x80" }, 1.693 + { 0x00080800, "\xF2\x80\xA0\x80" }, 1.694 + { 0x00081000, "\xF2\x81\x80\x80" }, 1.695 + { 0x00082000, "\xF2\x82\x80\x80" }, 1.696 + { 0x00084000, "\xF2\x84\x80\x80" }, 1.697 + { 0x00088000, "\xF2\x88\x80\x80" }, 1.698 + { 0x00090000, "\xF2\x90\x80\x80" }, 1.699 + { 0x000A0000, "\xF2\xA0\x80\x80" }, 1.700 + { 0x000C0000, "\xF3\x80\x80\x80" }, 1.701 + { 0x000FFFFF, "\xF3\xBF\xBF\xBF" }, 1.702 + { 0x00100000, "\xF4\x80\x80\x80" }, 1.703 + { 0x00100001, "\xF4\x80\x80\x81" }, 1.704 + { 0x00100002, "\xF4\x80\x80\x82" }, 1.705 + { 0x00100004, "\xF4\x80\x80\x84" }, 1.706 + { 0x00100008, "\xF4\x80\x80\x88" }, 1.707 + { 0x00100010, "\xF4\x80\x80\x90" }, 1.708 + { 0x00100020, "\xF4\x80\x80\xA0" }, 1.709 + { 0x00100040, "\xF4\x80\x81\x80" }, 1.710 + { 0x00100080, "\xF4\x80\x82\x80" }, 1.711 + { 0x00100100, "\xF4\x80\x84\x80" }, 1.712 + { 0x00100200, "\xF4\x80\x88\x80" }, 1.713 + { 0x00100400, "\xF4\x80\x90\x80" }, 1.714 + { 0x00100800, "\xF4\x80\xA0\x80" }, 1.715 + { 0x00101000, "\xF4\x81\x80\x80" }, 1.716 + { 0x00102000, "\xF4\x82\x80\x80" }, 1.717 + { 0x00104000, "\xF4\x84\x80\x80" }, 1.718 + { 0x00108000, "\xF4\x88\x80\x80" }, 1.719 + { 0x0010FFFF, "\xF4\x8F\xBF\xBF" }, 1.720 +}; 1.721 + 1.722 +/* 1.723 + * UCS-2 vectors 1.724 + */ 1.725 + 1.726 +struct ucs2 ucs2[] = { 1.727 + { 0x0001, "\x01" }, 1.728 + { 0x0002, "\x02" }, 1.729 + { 0x0003, "\x03" }, 1.730 + { 0x0004, "\x04" }, 1.731 + { 0x0007, "\x07" }, 1.732 + { 0x0008, "\x08" }, 1.733 + { 0x000F, "\x0F" }, 1.734 + { 0x0010, "\x10" }, 1.735 + { 0x001F, "\x1F" }, 1.736 + { 0x0020, "\x20" }, 1.737 + { 0x003F, "\x3F" }, 1.738 + { 0x0040, "\x40" }, 1.739 + { 0x007F, "\x7F" }, 1.740 + 1.741 + { 0x0080, "\xC2\x80" }, 1.742 + { 0x0081, "\xC2\x81" }, 1.743 + { 0x0082, "\xC2\x82" }, 1.744 + { 0x0084, "\xC2\x84" }, 1.745 + { 0x0088, "\xC2\x88" }, 1.746 + { 0x0090, "\xC2\x90" }, 1.747 + { 0x00A0, "\xC2\xA0" }, 1.748 + { 0x00C0, "\xC3\x80" }, 1.749 + { 0x00FF, "\xC3\xBF" }, 1.750 + { 0x0100, "\xC4\x80" }, 1.751 + { 0x0101, "\xC4\x81" }, 1.752 + { 0x0102, "\xC4\x82" }, 1.753 + { 0x0104, "\xC4\x84" }, 1.754 + { 0x0108, "\xC4\x88" }, 1.755 + { 0x0110, "\xC4\x90" }, 1.756 + { 0x0120, "\xC4\xA0" }, 1.757 + { 0x0140, "\xC5\x80" }, 1.758 + { 0x0180, "\xC6\x80" }, 1.759 + { 0x01FF, "\xC7\xBF" }, 1.760 + { 0x0200, "\xC8\x80" }, 1.761 + { 0x0201, "\xC8\x81" }, 1.762 + { 0x0202, "\xC8\x82" }, 1.763 + { 0x0204, "\xC8\x84" }, 1.764 + { 0x0208, "\xC8\x88" }, 1.765 + { 0x0210, "\xC8\x90" }, 1.766 + { 0x0220, "\xC8\xA0" }, 1.767 + { 0x0240, "\xC9\x80" }, 1.768 + { 0x0280, "\xCA\x80" }, 1.769 + { 0x0300, "\xCC\x80" }, 1.770 + { 0x03FF, "\xCF\xBF" }, 1.771 + { 0x0400, "\xD0\x80" }, 1.772 + { 0x0401, "\xD0\x81" }, 1.773 + { 0x0402, "\xD0\x82" }, 1.774 + { 0x0404, "\xD0\x84" }, 1.775 + { 0x0408, "\xD0\x88" }, 1.776 + { 0x0410, "\xD0\x90" }, 1.777 + { 0x0420, "\xD0\xA0" }, 1.778 + { 0x0440, "\xD1\x80" }, 1.779 + { 0x0480, "\xD2\x80" }, 1.780 + { 0x0500, "\xD4\x80" }, 1.781 + { 0x0600, "\xD8\x80" }, 1.782 + { 0x07FF, "\xDF\xBF" }, 1.783 + 1.784 + { 0x0800, "\xE0\xA0\x80" }, 1.785 + { 0x0801, "\xE0\xA0\x81" }, 1.786 + { 0x0802, "\xE0\xA0\x82" }, 1.787 + { 0x0804, "\xE0\xA0\x84" }, 1.788 + { 0x0808, "\xE0\xA0\x88" }, 1.789 + { 0x0810, "\xE0\xA0\x90" }, 1.790 + { 0x0820, "\xE0\xA0\xA0" }, 1.791 + { 0x0840, "\xE0\xA1\x80" }, 1.792 + { 0x0880, "\xE0\xA2\x80" }, 1.793 + { 0x0900, "\xE0\xA4\x80" }, 1.794 + { 0x0A00, "\xE0\xA8\x80" }, 1.795 + { 0x0C00, "\xE0\xB0\x80" }, 1.796 + { 0x0FFF, "\xE0\xBF\xBF" }, 1.797 + { 0x1000, "\xE1\x80\x80" }, 1.798 + { 0x1001, "\xE1\x80\x81" }, 1.799 + { 0x1002, "\xE1\x80\x82" }, 1.800 + { 0x1004, "\xE1\x80\x84" }, 1.801 + { 0x1008, "\xE1\x80\x88" }, 1.802 + { 0x1010, "\xE1\x80\x90" }, 1.803 + { 0x1020, "\xE1\x80\xA0" }, 1.804 + { 0x1040, "\xE1\x81\x80" }, 1.805 + { 0x1080, "\xE1\x82\x80" }, 1.806 + { 0x1100, "\xE1\x84\x80" }, 1.807 + { 0x1200, "\xE1\x88\x80" }, 1.808 + { 0x1400, "\xE1\x90\x80" }, 1.809 + { 0x1800, "\xE1\xA0\x80" }, 1.810 + { 0x1FFF, "\xE1\xBF\xBF" }, 1.811 + { 0x2000, "\xE2\x80\x80" }, 1.812 + { 0x2001, "\xE2\x80\x81" }, 1.813 + { 0x2002, "\xE2\x80\x82" }, 1.814 + { 0x2004, "\xE2\x80\x84" }, 1.815 + { 0x2008, "\xE2\x80\x88" }, 1.816 + { 0x2010, "\xE2\x80\x90" }, 1.817 + { 0x2020, "\xE2\x80\xA0" }, 1.818 + { 0x2040, "\xE2\x81\x80" }, 1.819 + { 0x2080, "\xE2\x82\x80" }, 1.820 + { 0x2100, "\xE2\x84\x80" }, 1.821 + { 0x2200, "\xE2\x88\x80" }, 1.822 + { 0x2400, "\xE2\x90\x80" }, 1.823 + { 0x2800, "\xE2\xA0\x80" }, 1.824 + { 0x3000, "\xE3\x80\x80" }, 1.825 + { 0x3FFF, "\xE3\xBF\xBF" }, 1.826 + { 0x4000, "\xE4\x80\x80" }, 1.827 + { 0x4001, "\xE4\x80\x81" }, 1.828 + { 0x4002, "\xE4\x80\x82" }, 1.829 + { 0x4004, "\xE4\x80\x84" }, 1.830 + { 0x4008, "\xE4\x80\x88" }, 1.831 + { 0x4010, "\xE4\x80\x90" }, 1.832 + { 0x4020, "\xE4\x80\xA0" }, 1.833 + { 0x4040, "\xE4\x81\x80" }, 1.834 + { 0x4080, "\xE4\x82\x80" }, 1.835 + { 0x4100, "\xE4\x84\x80" }, 1.836 + { 0x4200, "\xE4\x88\x80" }, 1.837 + { 0x4400, "\xE4\x90\x80" }, 1.838 + { 0x4800, "\xE4\xA0\x80" }, 1.839 + { 0x5000, "\xE5\x80\x80" }, 1.840 + { 0x6000, "\xE6\x80\x80" }, 1.841 + { 0x7FFF, "\xE7\xBF\xBF" }, 1.842 + { 0x8000, "\xE8\x80\x80" }, 1.843 + { 0x8001, "\xE8\x80\x81" }, 1.844 + { 0x8002, "\xE8\x80\x82" }, 1.845 + { 0x8004, "\xE8\x80\x84" }, 1.846 + { 0x8008, "\xE8\x80\x88" }, 1.847 + { 0x8010, "\xE8\x80\x90" }, 1.848 + { 0x8020, "\xE8\x80\xA0" }, 1.849 + { 0x8040, "\xE8\x81\x80" }, 1.850 + { 0x8080, "\xE8\x82\x80" }, 1.851 + { 0x8100, "\xE8\x84\x80" }, 1.852 + { 0x8200, "\xE8\x88\x80" }, 1.853 + { 0x8400, "\xE8\x90\x80" }, 1.854 + { 0x8800, "\xE8\xA0\x80" }, 1.855 + { 0x9000, "\xE9\x80\x80" }, 1.856 + { 0xA000, "\xEA\x80\x80" }, 1.857 + { 0xC000, "\xEC\x80\x80" }, 1.858 + { 0xFFFF, "\xEF\xBF\xBF" } 1.859 + 1.860 +}; 1.861 + 1.862 +/* 1.863 + * UTF-16 vectors 1.864 + */ 1.865 + 1.866 +struct utf16 utf16[] = { 1.867 + { 0x00010000, { 0xD800, 0xDC00 } }, 1.868 + { 0x00010001, { 0xD800, 0xDC01 } }, 1.869 + { 0x00010002, { 0xD800, 0xDC02 } }, 1.870 + { 0x00010003, { 0xD800, 0xDC03 } }, 1.871 + { 0x00010004, { 0xD800, 0xDC04 } }, 1.872 + { 0x00010007, { 0xD800, 0xDC07 } }, 1.873 + { 0x00010008, { 0xD800, 0xDC08 } }, 1.874 + { 0x0001000F, { 0xD800, 0xDC0F } }, 1.875 + { 0x00010010, { 0xD800, 0xDC10 } }, 1.876 + { 0x0001001F, { 0xD800, 0xDC1F } }, 1.877 + { 0x00010020, { 0xD800, 0xDC20 } }, 1.878 + { 0x0001003F, { 0xD800, 0xDC3F } }, 1.879 + { 0x00010040, { 0xD800, 0xDC40 } }, 1.880 + { 0x0001007F, { 0xD800, 0xDC7F } }, 1.881 + { 0x00010080, { 0xD800, 0xDC80 } }, 1.882 + { 0x00010081, { 0xD800, 0xDC81 } }, 1.883 + { 0x00010082, { 0xD800, 0xDC82 } }, 1.884 + { 0x00010084, { 0xD800, 0xDC84 } }, 1.885 + { 0x00010088, { 0xD800, 0xDC88 } }, 1.886 + { 0x00010090, { 0xD800, 0xDC90 } }, 1.887 + { 0x000100A0, { 0xD800, 0xDCA0 } }, 1.888 + { 0x000100C0, { 0xD800, 0xDCC0 } }, 1.889 + { 0x000100FF, { 0xD800, 0xDCFF } }, 1.890 + { 0x00010100, { 0xD800, 0xDD00 } }, 1.891 + { 0x00010101, { 0xD800, 0xDD01 } }, 1.892 + { 0x00010102, { 0xD800, 0xDD02 } }, 1.893 + { 0x00010104, { 0xD800, 0xDD04 } }, 1.894 + { 0x00010108, { 0xD800, 0xDD08 } }, 1.895 + { 0x00010110, { 0xD800, 0xDD10 } }, 1.896 + { 0x00010120, { 0xD800, 0xDD20 } }, 1.897 + { 0x00010140, { 0xD800, 0xDD40 } }, 1.898 + { 0x00010180, { 0xD800, 0xDD80 } }, 1.899 + { 0x000101FF, { 0xD800, 0xDDFF } }, 1.900 + { 0x00010200, { 0xD800, 0xDE00 } }, 1.901 + { 0x00010201, { 0xD800, 0xDE01 } }, 1.902 + { 0x00010202, { 0xD800, 0xDE02 } }, 1.903 + { 0x00010204, { 0xD800, 0xDE04 } }, 1.904 + { 0x00010208, { 0xD800, 0xDE08 } }, 1.905 + { 0x00010210, { 0xD800, 0xDE10 } }, 1.906 + { 0x00010220, { 0xD800, 0xDE20 } }, 1.907 + { 0x00010240, { 0xD800, 0xDE40 } }, 1.908 + { 0x00010280, { 0xD800, 0xDE80 } }, 1.909 + { 0x00010300, { 0xD800, 0xDF00 } }, 1.910 + { 0x000103FF, { 0xD800, 0xDFFF } }, 1.911 + { 0x00010400, { 0xD801, 0xDC00 } }, 1.912 + { 0x00010401, { 0xD801, 0xDC01 } }, 1.913 + { 0x00010402, { 0xD801, 0xDC02 } }, 1.914 + { 0x00010404, { 0xD801, 0xDC04 } }, 1.915 + { 0x00010408, { 0xD801, 0xDC08 } }, 1.916 + { 0x00010410, { 0xD801, 0xDC10 } }, 1.917 + { 0x00010420, { 0xD801, 0xDC20 } }, 1.918 + { 0x00010440, { 0xD801, 0xDC40 } }, 1.919 + { 0x00010480, { 0xD801, 0xDC80 } }, 1.920 + { 0x00010500, { 0xD801, 0xDD00 } }, 1.921 + { 0x00010600, { 0xD801, 0xDE00 } }, 1.922 + { 0x000107FF, { 0xD801, 0xDFFF } }, 1.923 + { 0x00010800, { 0xD802, 0xDC00 } }, 1.924 + { 0x00010801, { 0xD802, 0xDC01 } }, 1.925 + { 0x00010802, { 0xD802, 0xDC02 } }, 1.926 + { 0x00010804, { 0xD802, 0xDC04 } }, 1.927 + { 0x00010808, { 0xD802, 0xDC08 } }, 1.928 + { 0x00010810, { 0xD802, 0xDC10 } }, 1.929 + { 0x00010820, { 0xD802, 0xDC20 } }, 1.930 + { 0x00010840, { 0xD802, 0xDC40 } }, 1.931 + { 0x00010880, { 0xD802, 0xDC80 } }, 1.932 + { 0x00010900, { 0xD802, 0xDD00 } }, 1.933 + { 0x00010A00, { 0xD802, 0xDE00 } }, 1.934 + { 0x00010C00, { 0xD803, 0xDC00 } }, 1.935 + { 0x00010FFF, { 0xD803, 0xDFFF } }, 1.936 + { 0x00011000, { 0xD804, 0xDC00 } }, 1.937 + { 0x00011001, { 0xD804, 0xDC01 } }, 1.938 + { 0x00011002, { 0xD804, 0xDC02 } }, 1.939 + { 0x00011004, { 0xD804, 0xDC04 } }, 1.940 + { 0x00011008, { 0xD804, 0xDC08 } }, 1.941 + { 0x00011010, { 0xD804, 0xDC10 } }, 1.942 + { 0x00011020, { 0xD804, 0xDC20 } }, 1.943 + { 0x00011040, { 0xD804, 0xDC40 } }, 1.944 + { 0x00011080, { 0xD804, 0xDC80 } }, 1.945 + { 0x00011100, { 0xD804, 0xDD00 } }, 1.946 + { 0x00011200, { 0xD804, 0xDE00 } }, 1.947 + { 0x00011400, { 0xD805, 0xDC00 } }, 1.948 + { 0x00011800, { 0xD806, 0xDC00 } }, 1.949 + { 0x00011FFF, { 0xD807, 0xDFFF } }, 1.950 + { 0x00012000, { 0xD808, 0xDC00 } }, 1.951 + { 0x00012001, { 0xD808, 0xDC01 } }, 1.952 + { 0x00012002, { 0xD808, 0xDC02 } }, 1.953 + { 0x00012004, { 0xD808, 0xDC04 } }, 1.954 + { 0x00012008, { 0xD808, 0xDC08 } }, 1.955 + { 0x00012010, { 0xD808, 0xDC10 } }, 1.956 + { 0x00012020, { 0xD808, 0xDC20 } }, 1.957 + { 0x00012040, { 0xD808, 0xDC40 } }, 1.958 + { 0x00012080, { 0xD808, 0xDC80 } }, 1.959 + { 0x00012100, { 0xD808, 0xDD00 } }, 1.960 + { 0x00012200, { 0xD808, 0xDE00 } }, 1.961 + { 0x00012400, { 0xD809, 0xDC00 } }, 1.962 + { 0x00012800, { 0xD80A, 0xDC00 } }, 1.963 + { 0x00013000, { 0xD80C, 0xDC00 } }, 1.964 + { 0x00013FFF, { 0xD80F, 0xDFFF } }, 1.965 + { 0x00014000, { 0xD810, 0xDC00 } }, 1.966 + { 0x00014001, { 0xD810, 0xDC01 } }, 1.967 + { 0x00014002, { 0xD810, 0xDC02 } }, 1.968 + { 0x00014004, { 0xD810, 0xDC04 } }, 1.969 + { 0x00014008, { 0xD810, 0xDC08 } }, 1.970 + { 0x00014010, { 0xD810, 0xDC10 } }, 1.971 + { 0x00014020, { 0xD810, 0xDC20 } }, 1.972 + { 0x00014040, { 0xD810, 0xDC40 } }, 1.973 + { 0x00014080, { 0xD810, 0xDC80 } }, 1.974 + { 0x00014100, { 0xD810, 0xDD00 } }, 1.975 + { 0x00014200, { 0xD810, 0xDE00 } }, 1.976 + { 0x00014400, { 0xD811, 0xDC00 } }, 1.977 + { 0x00014800, { 0xD812, 0xDC00 } }, 1.978 + { 0x00015000, { 0xD814, 0xDC00 } }, 1.979 + { 0x00016000, { 0xD818, 0xDC00 } }, 1.980 + { 0x00017FFF, { 0xD81F, 0xDFFF } }, 1.981 + { 0x00018000, { 0xD820, 0xDC00 } }, 1.982 + { 0x00018001, { 0xD820, 0xDC01 } }, 1.983 + { 0x00018002, { 0xD820, 0xDC02 } }, 1.984 + { 0x00018004, { 0xD820, 0xDC04 } }, 1.985 + { 0x00018008, { 0xD820, 0xDC08 } }, 1.986 + { 0x00018010, { 0xD820, 0xDC10 } }, 1.987 + { 0x00018020, { 0xD820, 0xDC20 } }, 1.988 + { 0x00018040, { 0xD820, 0xDC40 } }, 1.989 + { 0x00018080, { 0xD820, 0xDC80 } }, 1.990 + { 0x00018100, { 0xD820, 0xDD00 } }, 1.991 + { 0x00018200, { 0xD820, 0xDE00 } }, 1.992 + { 0x00018400, { 0xD821, 0xDC00 } }, 1.993 + { 0x00018800, { 0xD822, 0xDC00 } }, 1.994 + { 0x00019000, { 0xD824, 0xDC00 } }, 1.995 + { 0x0001A000, { 0xD828, 0xDC00 } }, 1.996 + { 0x0001C000, { 0xD830, 0xDC00 } }, 1.997 + { 0x0001FFFF, { 0xD83F, 0xDFFF } }, 1.998 + { 0x00020000, { 0xD840, 0xDC00 } }, 1.999 + { 0x00020001, { 0xD840, 0xDC01 } }, 1.1000 + { 0x00020002, { 0xD840, 0xDC02 } }, 1.1001 + { 0x00020004, { 0xD840, 0xDC04 } }, 1.1002 + { 0x00020008, { 0xD840, 0xDC08 } }, 1.1003 + { 0x00020010, { 0xD840, 0xDC10 } }, 1.1004 + { 0x00020020, { 0xD840, 0xDC20 } }, 1.1005 + { 0x00020040, { 0xD840, 0xDC40 } }, 1.1006 + { 0x00020080, { 0xD840, 0xDC80 } }, 1.1007 + { 0x00020100, { 0xD840, 0xDD00 } }, 1.1008 + { 0x00020200, { 0xD840, 0xDE00 } }, 1.1009 + { 0x00020400, { 0xD841, 0xDC00 } }, 1.1010 + { 0x00020800, { 0xD842, 0xDC00 } }, 1.1011 + { 0x00021000, { 0xD844, 0xDC00 } }, 1.1012 + { 0x00022000, { 0xD848, 0xDC00 } }, 1.1013 + { 0x00024000, { 0xD850, 0xDC00 } }, 1.1014 + { 0x00028000, { 0xD860, 0xDC00 } }, 1.1015 + { 0x0002FFFF, { 0xD87F, 0xDFFF } }, 1.1016 + { 0x00030000, { 0xD880, 0xDC00 } }, 1.1017 + { 0x00030001, { 0xD880, 0xDC01 } }, 1.1018 + { 0x00030002, { 0xD880, 0xDC02 } }, 1.1019 + { 0x00030004, { 0xD880, 0xDC04 } }, 1.1020 + { 0x00030008, { 0xD880, 0xDC08 } }, 1.1021 + { 0x00030010, { 0xD880, 0xDC10 } }, 1.1022 + { 0x00030020, { 0xD880, 0xDC20 } }, 1.1023 + { 0x00030040, { 0xD880, 0xDC40 } }, 1.1024 + { 0x00030080, { 0xD880, 0xDC80 } }, 1.1025 + { 0x00030100, { 0xD880, 0xDD00 } }, 1.1026 + { 0x00030200, { 0xD880, 0xDE00 } }, 1.1027 + { 0x00030400, { 0xD881, 0xDC00 } }, 1.1028 + { 0x00030800, { 0xD882, 0xDC00 } }, 1.1029 + { 0x00031000, { 0xD884, 0xDC00 } }, 1.1030 + { 0x00032000, { 0xD888, 0xDC00 } }, 1.1031 + { 0x00034000, { 0xD890, 0xDC00 } }, 1.1032 + { 0x00038000, { 0xD8A0, 0xDC00 } }, 1.1033 + { 0x0003FFFF, { 0xD8BF, 0xDFFF } }, 1.1034 + { 0x00040000, { 0xD8C0, 0xDC00 } }, 1.1035 + { 0x00040001, { 0xD8C0, 0xDC01 } }, 1.1036 + { 0x00040002, { 0xD8C0, 0xDC02 } }, 1.1037 + { 0x00040004, { 0xD8C0, 0xDC04 } }, 1.1038 + { 0x00040008, { 0xD8C0, 0xDC08 } }, 1.1039 + { 0x00040010, { 0xD8C0, 0xDC10 } }, 1.1040 + { 0x00040020, { 0xD8C0, 0xDC20 } }, 1.1041 + { 0x00040040, { 0xD8C0, 0xDC40 } }, 1.1042 + { 0x00040080, { 0xD8C0, 0xDC80 } }, 1.1043 + { 0x00040100, { 0xD8C0, 0xDD00 } }, 1.1044 + { 0x00040200, { 0xD8C0, 0xDE00 } }, 1.1045 + { 0x00040400, { 0xD8C1, 0xDC00 } }, 1.1046 + { 0x00040800, { 0xD8C2, 0xDC00 } }, 1.1047 + { 0x00041000, { 0xD8C4, 0xDC00 } }, 1.1048 + { 0x00042000, { 0xD8C8, 0xDC00 } }, 1.1049 + { 0x00044000, { 0xD8D0, 0xDC00 } }, 1.1050 + { 0x00048000, { 0xD8E0, 0xDC00 } }, 1.1051 + { 0x0004FFFF, { 0xD8FF, 0xDFFF } }, 1.1052 + { 0x00050000, { 0xD900, 0xDC00 } }, 1.1053 + { 0x00050001, { 0xD900, 0xDC01 } }, 1.1054 + { 0x00050002, { 0xD900, 0xDC02 } }, 1.1055 + { 0x00050004, { 0xD900, 0xDC04 } }, 1.1056 + { 0x00050008, { 0xD900, 0xDC08 } }, 1.1057 + { 0x00050010, { 0xD900, 0xDC10 } }, 1.1058 + { 0x00050020, { 0xD900, 0xDC20 } }, 1.1059 + { 0x00050040, { 0xD900, 0xDC40 } }, 1.1060 + { 0x00050080, { 0xD900, 0xDC80 } }, 1.1061 + { 0x00050100, { 0xD900, 0xDD00 } }, 1.1062 + { 0x00050200, { 0xD900, 0xDE00 } }, 1.1063 + { 0x00050400, { 0xD901, 0xDC00 } }, 1.1064 + { 0x00050800, { 0xD902, 0xDC00 } }, 1.1065 + { 0x00051000, { 0xD904, 0xDC00 } }, 1.1066 + { 0x00052000, { 0xD908, 0xDC00 } }, 1.1067 + { 0x00054000, { 0xD910, 0xDC00 } }, 1.1068 + { 0x00058000, { 0xD920, 0xDC00 } }, 1.1069 + { 0x00060000, { 0xD940, 0xDC00 } }, 1.1070 + { 0x00070000, { 0xD980, 0xDC00 } }, 1.1071 + { 0x0007FFFF, { 0xD9BF, 0xDFFF } }, 1.1072 + { 0x00080000, { 0xD9C0, 0xDC00 } }, 1.1073 + { 0x00080001, { 0xD9C0, 0xDC01 } }, 1.1074 + { 0x00080002, { 0xD9C0, 0xDC02 } }, 1.1075 + { 0x00080004, { 0xD9C0, 0xDC04 } }, 1.1076 + { 0x00080008, { 0xD9C0, 0xDC08 } }, 1.1077 + { 0x00080010, { 0xD9C0, 0xDC10 } }, 1.1078 + { 0x00080020, { 0xD9C0, 0xDC20 } }, 1.1079 + { 0x00080040, { 0xD9C0, 0xDC40 } }, 1.1080 + { 0x00080080, { 0xD9C0, 0xDC80 } }, 1.1081 + { 0x00080100, { 0xD9C0, 0xDD00 } }, 1.1082 + { 0x00080200, { 0xD9C0, 0xDE00 } }, 1.1083 + { 0x00080400, { 0xD9C1, 0xDC00 } }, 1.1084 + { 0x00080800, { 0xD9C2, 0xDC00 } }, 1.1085 + { 0x00081000, { 0xD9C4, 0xDC00 } }, 1.1086 + { 0x00082000, { 0xD9C8, 0xDC00 } }, 1.1087 + { 0x00084000, { 0xD9D0, 0xDC00 } }, 1.1088 + { 0x00088000, { 0xD9E0, 0xDC00 } }, 1.1089 + { 0x0008FFFF, { 0xD9FF, 0xDFFF } }, 1.1090 + { 0x00090000, { 0xDA00, 0xDC00 } }, 1.1091 + { 0x00090001, { 0xDA00, 0xDC01 } }, 1.1092 + { 0x00090002, { 0xDA00, 0xDC02 } }, 1.1093 + { 0x00090004, { 0xDA00, 0xDC04 } }, 1.1094 + { 0x00090008, { 0xDA00, 0xDC08 } }, 1.1095 + { 0x00090010, { 0xDA00, 0xDC10 } }, 1.1096 + { 0x00090020, { 0xDA00, 0xDC20 } }, 1.1097 + { 0x00090040, { 0xDA00, 0xDC40 } }, 1.1098 + { 0x00090080, { 0xDA00, 0xDC80 } }, 1.1099 + { 0x00090100, { 0xDA00, 0xDD00 } }, 1.1100 + { 0x00090200, { 0xDA00, 0xDE00 } }, 1.1101 + { 0x00090400, { 0xDA01, 0xDC00 } }, 1.1102 + { 0x00090800, { 0xDA02, 0xDC00 } }, 1.1103 + { 0x00091000, { 0xDA04, 0xDC00 } }, 1.1104 + { 0x00092000, { 0xDA08, 0xDC00 } }, 1.1105 + { 0x00094000, { 0xDA10, 0xDC00 } }, 1.1106 + { 0x00098000, { 0xDA20, 0xDC00 } }, 1.1107 + { 0x000A0000, { 0xDA40, 0xDC00 } }, 1.1108 + { 0x000B0000, { 0xDA80, 0xDC00 } }, 1.1109 + { 0x000C0000, { 0xDAC0, 0xDC00 } }, 1.1110 + { 0x000D0000, { 0xDB00, 0xDC00 } }, 1.1111 + { 0x000FFFFF, { 0xDBBF, 0xDFFF } }, 1.1112 + { 0x0010FFFF, { 0xDBFF, 0xDFFF } } 1.1113 + 1.1114 +}; 1.1115 + 1.1116 +/* illegal utf8 sequences */ 1.1117 +char *utf8_bad[] = { 1.1118 + "\xC0\x80", 1.1119 + "\xC1\xBF", 1.1120 + "\xE0\x80\x80", 1.1121 + "\xE0\x9F\xBF", 1.1122 + "\xF0\x80\x80\x80", 1.1123 + "\xF0\x8F\xBF\xBF", 1.1124 + "\xF4\x90\x80\x80", 1.1125 + "\xF7\xBF\xBF\xBF", 1.1126 + "\xF8\x80\x80\x80\x80", 1.1127 + "\xF8\x88\x80\x80\x80", 1.1128 + "\xF8\x92\x80\x80\x80", 1.1129 + "\xF8\x9F\xBF\xBF\xBF", 1.1130 + "\xF8\xA0\x80\x80\x80", 1.1131 + "\xF8\xA8\x80\x80\x80", 1.1132 + "\xF8\xB0\x80\x80\x80", 1.1133 + "\xF8\xBF\xBF\xBF\xBF", 1.1134 + "\xF9\x80\x80\x80\x88", 1.1135 + "\xF9\x84\x80\x80\x80", 1.1136 + "\xF9\xBF\xBF\xBF\xBF", 1.1137 + "\xFA\x80\x80\x80\x80", 1.1138 + "\xFA\x90\x80\x80\x80", 1.1139 + "\xFB\xBF\xBF\xBF\xBF", 1.1140 + "\xFC\x84\x80\x80\x80\x81", 1.1141 + "\xFC\x85\x80\x80\x80\x80", 1.1142 + "\xFC\x86\x80\x80\x80\x80", 1.1143 + "\xFC\x87\xBF\xBF\xBF\xBF", 1.1144 + "\xFC\x88\xA0\x80\x80\x80", 1.1145 + "\xFC\x89\x80\x80\x80\x80", 1.1146 + "\xFC\x8A\x80\x80\x80\x80", 1.1147 + "\xFC\x90\x80\x80\x80\x82", 1.1148 + "\xFD\x80\x80\x80\x80\x80", 1.1149 + "\xFD\xBF\xBF\xBF\xBF\xBF", 1.1150 + "\x80", 1.1151 + "\xC3", 1.1152 + "\xC3\xC3\x80", 1.1153 + "\xED\xA0\x80", 1.1154 + "\xED\xBF\x80", 1.1155 + "\xED\xBF\xBF", 1.1156 + "\xED\xA0\x80\xE0\xBF\xBF", 1.1157 +}; 1.1158 + 1.1159 +static void 1.1160 +dump_utf8 1.1161 +( 1.1162 + char *word, 1.1163 + unsigned char *utf8, 1.1164 + char *end 1.1165 +) 1.1166 +{ 1.1167 + fprintf(stdout, "%s ", word); 1.1168 + for( ; *utf8; utf8++ ) { 1.1169 + fprintf(stdout, "%02.2x ", (unsigned int)*utf8); 1.1170 + } 1.1171 + fprintf(stdout, "%s", end); 1.1172 +} 1.1173 + 1.1174 +static PRBool 1.1175 +test_ucs4_chars 1.1176 +( 1.1177 + void 1.1178 +) 1.1179 +{ 1.1180 + PRBool rv = PR_TRUE; 1.1181 + int i; 1.1182 + 1.1183 + for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { 1.1184 + struct ucs4 *e = &ucs4[i]; 1.1185 + PRBool result; 1.1186 + unsigned char utf8[8]; 1.1187 + unsigned int len = 0; 1.1188 + PRUint32 back = 0; 1.1189 + 1.1190 + (void)memset(utf8, 0, sizeof(utf8)); 1.1191 + 1.1192 + result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 1.1193 + (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); 1.1194 + 1.1195 + if( !result ) { 1.1196 + fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8\n", e->c); 1.1197 + rv = PR_FALSE; 1.1198 + continue; 1.1199 + } 1.1200 + 1.1201 + if( (len >= sizeof(utf8)) || 1.1202 + (strlen(e->utf8) != len) || 1.1203 + (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { 1.1204 + fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8: ", e->c); 1.1205 + dump_utf8("expected", e->utf8, ", "); 1.1206 + dump_utf8("received", utf8, "\n"); 1.1207 + rv = PR_FALSE; 1.1208 + continue; 1.1209 + } 1.1210 + 1.1211 + result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, 1.1212 + utf8, len, (unsigned char *)&back, sizeof(back), &len); 1.1213 + 1.1214 + if( !result ) { 1.1215 + dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4\n"); 1.1216 + rv = PR_FALSE; 1.1217 + continue; 1.1218 + } 1.1219 + 1.1220 + if( (sizeof(back) != len) || (e->c != back) ) { 1.1221 + dump_utf8("Wrong conversion of UTF-8", utf8, " to UCS-4:"); 1.1222 + fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); 1.1223 + rv = PR_FALSE; 1.1224 + continue; 1.1225 + } 1.1226 + } 1.1227 + 1.1228 + return rv; 1.1229 +} 1.1230 + 1.1231 +static PRBool 1.1232 +test_ucs2_chars 1.1233 +( 1.1234 + void 1.1235 +) 1.1236 +{ 1.1237 + PRBool rv = PR_TRUE; 1.1238 + int i; 1.1239 + 1.1240 + for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { 1.1241 + struct ucs2 *e = &ucs2[i]; 1.1242 + PRBool result; 1.1243 + unsigned char utf8[8]; 1.1244 + unsigned int len = 0; 1.1245 + PRUint16 back = 0; 1.1246 + 1.1247 + (void)memset(utf8, 0, sizeof(utf8)); 1.1248 + 1.1249 + result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 1.1250 + (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); 1.1251 + 1.1252 + if( !result ) { 1.1253 + fprintf(stdout, "Failed to convert UCS-2 0x%04.4x to UTF-8\n", e->c); 1.1254 + rv = PR_FALSE; 1.1255 + continue; 1.1256 + } 1.1257 + 1.1258 + if( (len >= sizeof(utf8)) || 1.1259 + (strlen(e->utf8) != len) || 1.1260 + (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { 1.1261 + fprintf(stdout, "Wrong conversion of UCS-2 0x%04.4x to UTF-8: ", e->c); 1.1262 + dump_utf8("expected", e->utf8, ", "); 1.1263 + dump_utf8("received", utf8, "\n"); 1.1264 + rv = PR_FALSE; 1.1265 + continue; 1.1266 + } 1.1267 + 1.1268 + result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, 1.1269 + utf8, len, (unsigned char *)&back, sizeof(back), &len); 1.1270 + 1.1271 + if( !result ) { 1.1272 + dump_utf8("Failed to convert UTF-8", utf8, "to UCS-2\n"); 1.1273 + rv = PR_FALSE; 1.1274 + continue; 1.1275 + } 1.1276 + 1.1277 + if( (sizeof(back) != len) || (e->c != back) ) { 1.1278 + dump_utf8("Wrong conversion of UTF-8", utf8, "to UCS-2:"); 1.1279 + fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back); 1.1280 + rv = PR_FALSE; 1.1281 + continue; 1.1282 + } 1.1283 + } 1.1284 + 1.1285 + return rv; 1.1286 +} 1.1287 + 1.1288 +static PRBool 1.1289 +test_utf16_chars 1.1290 +( 1.1291 + void 1.1292 +) 1.1293 +{ 1.1294 + PRBool rv = PR_TRUE; 1.1295 + int i; 1.1296 + 1.1297 + for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { 1.1298 + struct utf16 *e = &utf16[i]; 1.1299 + PRBool result; 1.1300 + unsigned char utf8[8]; 1.1301 + unsigned int len = 0; 1.1302 + PRUint32 back32 = 0; 1.1303 + PRUint16 back[2]; 1.1304 + 1.1305 + (void)memset(utf8, 0, sizeof(utf8)); 1.1306 + 1.1307 + result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 1.1308 + (unsigned char *)&e->w[0], sizeof(e->w), utf8, sizeof(utf8), &len); 1.1309 + 1.1310 + if( !result ) { 1.1311 + fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8\n", 1.1312 + e->w[0], e->w[1]); 1.1313 + rv = PR_FALSE; 1.1314 + continue; 1.1315 + } 1.1316 + 1.1317 + result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, 1.1318 + utf8, len, (unsigned char *)&back32, sizeof(back32), &len); 1.1319 + 1.1320 + if( 4 != len ) { 1.1321 + fprintf(stdout, "Failed to convert UTF-16 0x%04.4x 0x%04.4x to UTF-8: " 1.1322 + "unexpected len %d\n", e->w[0], e->w[1], len); 1.1323 + rv = PR_FALSE; 1.1324 + continue; 1.1325 + } 1.1326 + 1.1327 + utf8[len] = '\0'; /* null-terminate for printing */ 1.1328 + 1.1329 + if( !result ) { 1.1330 + dump_utf8("Failed to convert UTF-8", utf8, "to UCS-4 (utf-16 test)\n"); 1.1331 + rv = PR_FALSE; 1.1332 + continue; 1.1333 + } 1.1334 + 1.1335 + if( (sizeof(back32) != len) || (e->c != back32) ) { 1.1336 + fprintf(stdout, "Wrong conversion of UTF-16 0x%04.4x 0x%04.4x ", 1.1337 + e->w[0], e->w[1]); 1.1338 + dump_utf8("to UTF-8", utf8, "and then to UCS-4: "); 1.1339 + if( sizeof(back32) != len ) { 1.1340 + fprintf(stdout, "len is %d\n", len); 1.1341 + } else { 1.1342 + fprintf(stdout, "expected 0x%08.8x, received 0x%08.8x\n", e->c, back32); 1.1343 + } 1.1344 + rv = PR_FALSE; 1.1345 + continue; 1.1346 + } 1.1347 + 1.1348 + (void)memset(utf8, 0, sizeof(utf8)); 1.1349 + back[0] = back[1] = 0; 1.1350 + 1.1351 + result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 1.1352 + (unsigned char *)&e->c, sizeof(e->c), utf8, sizeof(utf8), &len); 1.1353 + 1.1354 + if( !result ) { 1.1355 + fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8 (utf-16 test)\n", 1.1356 + e->c); 1.1357 + rv = PR_FALSE; 1.1358 + continue; 1.1359 + } 1.1360 + 1.1361 + result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, 1.1362 + utf8, len, (unsigned char *)&back[0], sizeof(back), &len); 1.1363 + 1.1364 + if( 4 != len ) { 1.1365 + fprintf(stdout, "Failed to convert UCS-4 0x%08.8x to UTF-8: " 1.1366 + "unexpected len %d\n", e->c, len); 1.1367 + rv = PR_FALSE; 1.1368 + continue; 1.1369 + } 1.1370 + 1.1371 + utf8[len] = '\0'; /* null-terminate for printing */ 1.1372 + 1.1373 + if( !result ) { 1.1374 + dump_utf8("Failed to convert UTF-8", utf8, "to UTF-16\n"); 1.1375 + rv = PR_FALSE; 1.1376 + continue; 1.1377 + } 1.1378 + 1.1379 + if( (sizeof(back) != len) || (e->w[0] != back[0]) || (e->w[1] != back[1]) ) { 1.1380 + fprintf(stdout, "Wrong conversion of UCS-4 0x%08.8x to UTF-8", e->c); 1.1381 + dump_utf8("", utf8, "and then to UTF-16:"); 1.1382 + if( sizeof(back) != len ) { 1.1383 + fprintf(stdout, "len is %d\n", len); 1.1384 + } else { 1.1385 + fprintf(stdout, "expected 0x%04.4x 0x%04.4x, received 0x%04.4x 0x%04.4xx\n", 1.1386 + e->w[0], e->w[1], back[0], back[1]); 1.1387 + } 1.1388 + rv = PR_FALSE; 1.1389 + continue; 1.1390 + } 1.1391 + } 1.1392 + 1.1393 + return rv; 1.1394 +} 1.1395 + 1.1396 +static PRBool 1.1397 +test_utf8_bad_chars 1.1398 +( 1.1399 + void 1.1400 +) 1.1401 +{ 1.1402 + PRBool rv = PR_TRUE; 1.1403 + int i; 1.1404 + 1.1405 + for( i = 0; i < sizeof(utf8_bad)/sizeof(utf8_bad[0]); i++ ) { 1.1406 + PRBool result; 1.1407 + unsigned char destbuf[30]; 1.1408 + unsigned int len = 0; 1.1409 + 1.1410 + result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, 1.1411 + (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len); 1.1412 + 1.1413 + if( result ) { 1.1414 + dump_utf8("Failed to detect bad UTF-8 string converting to UCS2: ", utf8_bad[i], "\n"); 1.1415 + rv = PR_FALSE; 1.1416 + continue; 1.1417 + } 1.1418 + result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, 1.1419 + (unsigned char *)utf8_bad[i], strlen(utf8_bad[i]), destbuf, sizeof(destbuf), &len); 1.1420 + 1.1421 + if( result ) { 1.1422 + dump_utf8("Failed to detect bad UTF-8 string converting to UCS4: ", utf8_bad[i], "\n"); 1.1423 + rv = PR_FALSE; 1.1424 + continue; 1.1425 + } 1.1426 + 1.1427 + } 1.1428 + 1.1429 + return rv; 1.1430 +} 1.1431 + 1.1432 +static PRBool 1.1433 +test_iso88591_chars 1.1434 +( 1.1435 + void 1.1436 +) 1.1437 +{ 1.1438 + PRBool rv = PR_TRUE; 1.1439 + int i; 1.1440 + 1.1441 + for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { 1.1442 + struct ucs2 *e = &ucs2[i]; 1.1443 + PRBool result; 1.1444 + unsigned char iso88591; 1.1445 + unsigned char utf8[3]; 1.1446 + unsigned int len = 0; 1.1447 + 1.1448 + if (ntohs(e->c) > 0xFF) continue; 1.1449 + 1.1450 + (void)memset(utf8, 0, sizeof(utf8)); 1.1451 + iso88591 = ntohs(e->c); 1.1452 + 1.1453 + result = sec_port_iso88591_utf8_conversion_function(&iso88591, 1.1454 + 1, utf8, sizeof(utf8), &len); 1.1455 + 1.1456 + if( !result ) { 1.1457 + fprintf(stdout, "Failed to convert ISO-8859-1 0x%02.2x to UTF-8\n", iso88591); 1.1458 + rv = PR_FALSE; 1.1459 + continue; 1.1460 + } 1.1461 + 1.1462 + if( (len >= sizeof(utf8)) || 1.1463 + (strlen(e->utf8) != len) || 1.1464 + (utf8[len] = '\0', 0 != strcmp(e->utf8, utf8)) ) { 1.1465 + fprintf(stdout, "Wrong conversion of ISO-8859-1 0x%02.2x to UTF-8: ", iso88591); 1.1466 + dump_utf8("expected", e->utf8, ", "); 1.1467 + dump_utf8("received", utf8, "\n"); 1.1468 + rv = PR_FALSE; 1.1469 + continue; 1.1470 + } 1.1471 + 1.1472 + } 1.1473 + 1.1474 + return rv; 1.1475 +} 1.1476 + 1.1477 +static PRBool 1.1478 +test_zeroes 1.1479 +( 1.1480 + void 1.1481 +) 1.1482 +{ 1.1483 + PRBool rv = PR_TRUE; 1.1484 + PRBool result; 1.1485 + PRUint32 lzero = 0; 1.1486 + PRUint16 szero = 0; 1.1487 + unsigned char utf8[8]; 1.1488 + unsigned int len = 0; 1.1489 + PRUint32 lback = 1; 1.1490 + PRUint16 sback = 1; 1.1491 + 1.1492 + (void)memset(utf8, 1, sizeof(utf8)); 1.1493 + 1.1494 + result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 1.1495 + (unsigned char *)&lzero, sizeof(lzero), utf8, sizeof(utf8), &len); 1.1496 + 1.1497 + if( !result ) { 1.1498 + fprintf(stdout, "Failed to convert UCS-4 0x00000000 to UTF-8\n"); 1.1499 + rv = PR_FALSE; 1.1500 + } else if( 1 != len ) { 1.1501 + fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: len = %d\n", len); 1.1502 + rv = PR_FALSE; 1.1503 + } else if( '\0' != *utf8 ) { 1.1504 + fprintf(stdout, "Wrong conversion of UCS-4 0x00000000: expected 00 ," 1.1505 + "received %02.2x\n", (unsigned int)*utf8); 1.1506 + rv = PR_FALSE; 1.1507 + } 1.1508 + 1.1509 + result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, 1.1510 + "", 1, (unsigned char *)&lback, sizeof(lback), &len); 1.1511 + 1.1512 + if( !result ) { 1.1513 + fprintf(stdout, "Failed to convert UTF-8 00 to UCS-4\n"); 1.1514 + rv = PR_FALSE; 1.1515 + } else if( 4 != len ) { 1.1516 + fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: len = %d\n", len); 1.1517 + rv = PR_FALSE; 1.1518 + } else if( 0 != lback ) { 1.1519 + fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-4: " 1.1520 + "expected 0x00000000, received 0x%08.8x\n", lback); 1.1521 + rv = PR_FALSE; 1.1522 + } 1.1523 + 1.1524 + (void)memset(utf8, 1, sizeof(utf8)); 1.1525 + 1.1526 + result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 1.1527 + (unsigned char *)&szero, sizeof(szero), utf8, sizeof(utf8), &len); 1.1528 + 1.1529 + if( !result ) { 1.1530 + fprintf(stdout, "Failed to convert UCS-2 0x0000 to UTF-8\n"); 1.1531 + rv = PR_FALSE; 1.1532 + } else if( 1 != len ) { 1.1533 + fprintf(stdout, "Wrong conversion of UCS-2 0x0000: len = %d\n", len); 1.1534 + rv = PR_FALSE; 1.1535 + } else if( '\0' != *utf8 ) { 1.1536 + fprintf(stdout, "Wrong conversion of UCS-2 0x0000: expected 00 ," 1.1537 + "received %02.2x\n", (unsigned int)*utf8); 1.1538 + rv = PR_FALSE; 1.1539 + } 1.1540 + 1.1541 + result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, 1.1542 + "", 1, (unsigned char *)&sback, sizeof(sback), &len); 1.1543 + 1.1544 + if( !result ) { 1.1545 + fprintf(stdout, "Failed to convert UTF-8 00 to UCS-2\n"); 1.1546 + rv = PR_FALSE; 1.1547 + } else if( 2 != len ) { 1.1548 + fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: len = %d\n", len); 1.1549 + rv = PR_FALSE; 1.1550 + } else if( 0 != sback ) { 1.1551 + fprintf(stdout, "Wrong conversion of UTF-8 00 to UCS-2: " 1.1552 + "expected 0x0000, received 0x%04.4x\n", sback); 1.1553 + rv = PR_FALSE; 1.1554 + } 1.1555 + 1.1556 + return rv; 1.1557 +} 1.1558 + 1.1559 +static PRBool 1.1560 +test_multichars 1.1561 +( 1.1562 + void 1.1563 +) 1.1564 +{ 1.1565 + int i; 1.1566 + unsigned int len, lenout; 1.1567 + PRUint32 *ucs4s; 1.1568 + char *ucs4_utf8; 1.1569 + PRUint16 *ucs2s; 1.1570 + char *ucs2_utf8; 1.1571 + void *tmp; 1.1572 + PRBool result; 1.1573 + 1.1574 + ucs4s = (PRUint32 *)calloc(sizeof(ucs4)/sizeof(ucs4[0]), sizeof(PRUint32)); 1.1575 + ucs2s = (PRUint16 *)calloc(sizeof(ucs2)/sizeof(ucs2[0]), sizeof(PRUint16)); 1.1576 + 1.1577 + if( ((PRUint32 *)NULL == ucs4s) || ((PRUint16 *)NULL == ucs2s) ) { 1.1578 + fprintf(stderr, "out of memory\n"); 1.1579 + exit(1); 1.1580 + } 1.1581 + 1.1582 + len = 0; 1.1583 + for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { 1.1584 + ucs4s[i] = ucs4[i].c; 1.1585 + len += strlen(ucs4[i].utf8); 1.1586 + } 1.1587 + 1.1588 + ucs4_utf8 = (char *)malloc(len); 1.1589 + 1.1590 + len = 0; 1.1591 + for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { 1.1592 + ucs2s[i] = ucs2[i].c; 1.1593 + len += strlen(ucs2[i].utf8); 1.1594 + } 1.1595 + 1.1596 + ucs2_utf8 = (char *)malloc(len); 1.1597 + 1.1598 + if( ((char *)NULL == ucs4_utf8) || ((char *)NULL == ucs2_utf8) ) { 1.1599 + fprintf(stderr, "out of memory\n"); 1.1600 + exit(1); 1.1601 + } 1.1602 + 1.1603 + *ucs4_utf8 = '\0'; 1.1604 + for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { 1.1605 + strcat(ucs4_utf8, ucs4[i].utf8); 1.1606 + } 1.1607 + 1.1608 + *ucs2_utf8 = '\0'; 1.1609 + for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { 1.1610 + strcat(ucs2_utf8, ucs2[i].utf8); 1.1611 + } 1.1612 + 1.1613 + /* UTF-8 -> UCS-4 */ 1.1614 + len = sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32); 1.1615 + tmp = calloc(len, 1); 1.1616 + if( (void *)NULL == tmp ) { 1.1617 + fprintf(stderr, "out of memory\n"); 1.1618 + exit(1); 1.1619 + } 1.1620 + 1.1621 + result = sec_port_ucs4_utf8_conversion_function(PR_TRUE, 1.1622 + ucs4_utf8, strlen(ucs4_utf8), tmp, len, &lenout); 1.1623 + if( !result ) { 1.1624 + fprintf(stdout, "Failed to convert much UTF-8 to UCS-4\n"); 1.1625 + goto done; 1.1626 + } 1.1627 + 1.1628 + if( lenout != len ) { 1.1629 + fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-4\n"); 1.1630 + goto loser; 1.1631 + } 1.1632 + 1.1633 + if( 0 != memcmp(ucs4s, tmp, len) ) { 1.1634 + fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-4\n"); 1.1635 + goto loser; 1.1636 + } 1.1637 + 1.1638 + free(tmp); tmp = (void *)NULL; 1.1639 + 1.1640 + /* UCS-4 -> UTF-8 */ 1.1641 + len = strlen(ucs4_utf8); 1.1642 + tmp = calloc(len, 1); 1.1643 + if( (void *)NULL == tmp ) { 1.1644 + fprintf(stderr, "out of memory\n"); 1.1645 + exit(1); 1.1646 + } 1.1647 + 1.1648 + result = sec_port_ucs4_utf8_conversion_function(PR_FALSE, 1.1649 + (unsigned char *)ucs4s, sizeof(ucs4)/sizeof(ucs4[0]) * sizeof(PRUint32), 1.1650 + tmp, len, &lenout); 1.1651 + if( !result ) { 1.1652 + fprintf(stdout, "Failed to convert much UCS-4 to UTF-8\n"); 1.1653 + goto done; 1.1654 + } 1.1655 + 1.1656 + if( lenout != len ) { 1.1657 + fprintf(stdout, "Unexpected length converting much UCS-4 to UTF-8\n"); 1.1658 + goto loser; 1.1659 + } 1.1660 + 1.1661 + if( 0 != strncmp(ucs4_utf8, tmp, len) ) { 1.1662 + fprintf(stdout, "Wrong conversion of much UCS-4 to UTF-8\n"); 1.1663 + goto loser; 1.1664 + } 1.1665 + 1.1666 + free(tmp); tmp = (void *)NULL; 1.1667 + 1.1668 + /* UTF-8 -> UCS-2 */ 1.1669 + len = sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16); 1.1670 + tmp = calloc(len, 1); 1.1671 + if( (void *)NULL == tmp ) { 1.1672 + fprintf(stderr, "out of memory\n"); 1.1673 + exit(1); 1.1674 + } 1.1675 + 1.1676 + result = sec_port_ucs2_utf8_conversion_function(PR_TRUE, 1.1677 + ucs2_utf8, strlen(ucs2_utf8), tmp, len, &lenout); 1.1678 + if( !result ) { 1.1679 + fprintf(stdout, "Failed to convert much UTF-8 to UCS-2\n"); 1.1680 + goto done; 1.1681 + } 1.1682 + 1.1683 + if( lenout != len ) { 1.1684 + fprintf(stdout, "Unexpected length converting much UTF-8 to UCS-2\n"); 1.1685 + goto loser; 1.1686 + } 1.1687 + 1.1688 + if( 0 != memcmp(ucs2s, tmp, len) ) { 1.1689 + fprintf(stdout, "Wrong conversion of much UTF-8 to UCS-2\n"); 1.1690 + goto loser; 1.1691 + } 1.1692 + 1.1693 + free(tmp); tmp = (void *)NULL; 1.1694 + 1.1695 + /* UCS-2 -> UTF-8 */ 1.1696 + len = strlen(ucs2_utf8); 1.1697 + tmp = calloc(len, 1); 1.1698 + if( (void *)NULL == tmp ) { 1.1699 + fprintf(stderr, "out of memory\n"); 1.1700 + exit(1); 1.1701 + } 1.1702 + 1.1703 + result = sec_port_ucs2_utf8_conversion_function(PR_FALSE, 1.1704 + (unsigned char *)ucs2s, sizeof(ucs2)/sizeof(ucs2[0]) * sizeof(PRUint16), 1.1705 + tmp, len, &lenout); 1.1706 + if( !result ) { 1.1707 + fprintf(stdout, "Failed to convert much UCS-2 to UTF-8\n"); 1.1708 + goto done; 1.1709 + } 1.1710 + 1.1711 + if( lenout != len ) { 1.1712 + fprintf(stdout, "Unexpected length converting much UCS-2 to UTF-8\n"); 1.1713 + goto loser; 1.1714 + } 1.1715 + 1.1716 + if( 0 != strncmp(ucs2_utf8, tmp, len) ) { 1.1717 + fprintf(stdout, "Wrong conversion of much UCS-2 to UTF-8\n"); 1.1718 + goto loser; 1.1719 + } 1.1720 + 1.1721 + /* implement UTF16 */ 1.1722 + 1.1723 + result = PR_TRUE; 1.1724 + goto done; 1.1725 + 1.1726 + loser: 1.1727 + result = PR_FALSE; 1.1728 + done: 1.1729 + free(ucs4s); 1.1730 + free(ucs4_utf8); 1.1731 + free(ucs2s); 1.1732 + free(ucs2_utf8); 1.1733 + if( (void *)NULL != tmp ) free(tmp); 1.1734 + return result; 1.1735 +} 1.1736 + 1.1737 +void 1.1738 +byte_order 1.1739 +( 1.1740 + void 1.1741 +) 1.1742 +{ 1.1743 + /* 1.1744 + * The implementation (now) expects the 16- and 32-bit characters 1.1745 + * to be in network byte order, not host byte order. Therefore I 1.1746 + * have to byteswap all those test vectors above. hton[ls] may be 1.1747 + * functions, so I have to do this dynamically. If you want to 1.1748 + * use this code to do host byte order conversions, just remove 1.1749 + * the call in main() to this function. 1.1750 + */ 1.1751 + 1.1752 + int i; 1.1753 + 1.1754 + for( i = 0; i < sizeof(ucs4)/sizeof(ucs4[0]); i++ ) { 1.1755 + struct ucs4 *e = &ucs4[i]; 1.1756 + e->c = htonl(e->c); 1.1757 + } 1.1758 + 1.1759 + for( i = 0; i < sizeof(ucs2)/sizeof(ucs2[0]); i++ ) { 1.1760 + struct ucs2 *e = &ucs2[i]; 1.1761 + e->c = htons(e->c); 1.1762 + } 1.1763 + 1.1764 + for( i = 0; i < sizeof(utf16)/sizeof(utf16[0]); i++ ) { 1.1765 + struct utf16 *e = &utf16[i]; 1.1766 + e->c = htonl(e->c); 1.1767 + e->w[0] = htons(e->w[0]); 1.1768 + e->w[1] = htons(e->w[1]); 1.1769 + } 1.1770 + 1.1771 + return; 1.1772 +} 1.1773 + 1.1774 +int 1.1775 +main 1.1776 +( 1.1777 + int argc, 1.1778 + char *argv[] 1.1779 +) 1.1780 +{ 1.1781 + byte_order(); 1.1782 + 1.1783 + if( test_ucs4_chars() && 1.1784 + test_ucs2_chars() && 1.1785 + test_utf16_chars() && 1.1786 + test_utf8_bad_chars() && 1.1787 + test_iso88591_chars() && 1.1788 + test_zeroes() && 1.1789 + test_multichars() && 1.1790 + PR_TRUE ) { 1.1791 + fprintf(stderr, "PASS\n"); 1.1792 + return 1; 1.1793 + } else { 1.1794 + fprintf(stderr, "FAIL\n"); 1.1795 + return 0; 1.1796 + } 1.1797 +} 1.1798 + 1.1799 +#endif /* TEST_UTF8 */