1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/util/uscan.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,759 @@ 1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 +#include "unicpriv.h" 1.9 +#define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff ) 1.10 +#define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2)) 1.11 +/*================================================================================= 1.12 + 1.13 +=================================================================================*/ 1.14 +typedef int (*uSubScannerFunc) (unsigned char* in, uint16_t* out); 1.15 +/*================================================================================= 1.16 + 1.17 +=================================================================================*/ 1.18 + 1.19 +typedef int (*uScannerFunc) ( 1.20 + int32_t* state, 1.21 + unsigned char *in, 1.22 + uint16_t *out, 1.23 + uint32_t inbuflen, 1.24 + uint32_t* inscanlen 1.25 + ); 1.26 + 1.27 +int uScan( 1.28 + uScanClassID scanClass, 1.29 + int32_t* state, 1.30 + unsigned char *in, 1.31 + uint16_t *out, 1.32 + uint32_t inbuflen, 1.33 + uint32_t* inscanlen 1.34 + ); 1.35 + 1.36 +#define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out)) 1.37 + 1.38 +int uCheckAndScanAlways1Byte( 1.39 + int32_t* state, 1.40 + unsigned char *in, 1.41 + uint16_t *out, 1.42 + uint32_t inbuflen, 1.43 + uint32_t* inscanlen 1.44 + ); 1.45 +int uCheckAndScanAlways2Byte( 1.46 + int32_t* state, 1.47 + unsigned char *in, 1.48 + uint16_t *out, 1.49 + uint32_t inbuflen, 1.50 + uint32_t* inscanlen 1.51 + ); 1.52 +int uCheckAndScanAlways2ByteShiftGR( 1.53 + int32_t* state, 1.54 + unsigned char *in, 1.55 + uint16_t *out, 1.56 + uint32_t inbuflen, 1.57 + uint32_t* inscanlen 1.58 + ); 1.59 +int uCheckAndScanAlways2ByteGR128( 1.60 + int32_t* state, 1.61 + unsigned char *in, 1.62 + uint16_t *out, 1.63 + uint32_t inbuflen, 1.64 + uint32_t* inscanlen 1.65 + ); 1.66 +int uScanShift( 1.67 + uShiftInTable *shift, 1.68 + int32_t* state, 1.69 + unsigned char *in, 1.70 + uint16_t *out, 1.71 + uint32_t inbuflen, 1.72 + uint32_t* inscanlen 1.73 + ); 1.74 + 1.75 +int uCheckAndScan2ByteGRPrefix8F( 1.76 + int32_t* state, 1.77 + unsigned char *in, 1.78 + uint16_t *out, 1.79 + uint32_t inbuflen, 1.80 + uint32_t* inscanlen 1.81 + ); 1.82 +int uCheckAndScan2ByteGRPrefix8EA2( 1.83 + int32_t* state, 1.84 + unsigned char *in, 1.85 + uint16_t *out, 1.86 + uint32_t inbuflen, 1.87 + uint32_t* inscanlen 1.88 + ); 1.89 +int uCheckAndScan2ByteGRPrefix8EA3( 1.90 + int32_t* state, 1.91 + unsigned char *in, 1.92 + uint16_t *out, 1.93 + uint32_t inbuflen, 1.94 + uint32_t* inscanlen 1.95 + ); 1.96 +int uCheckAndScan2ByteGRPrefix8EA4( 1.97 + int32_t* state, 1.98 + unsigned char *in, 1.99 + uint16_t *out, 1.100 + uint32_t inbuflen, 1.101 + uint32_t* inscanlen 1.102 + ); 1.103 +int uCheckAndScan2ByteGRPrefix8EA5( 1.104 + int32_t* state, 1.105 + unsigned char *in, 1.106 + uint16_t *out, 1.107 + uint32_t inbuflen, 1.108 + uint32_t* inscanlen 1.109 + ); 1.110 +int uCheckAndScan2ByteGRPrefix8EA6( 1.111 + int32_t* state, 1.112 + unsigned char *in, 1.113 + uint16_t *out, 1.114 + uint32_t inbuflen, 1.115 + uint32_t* inscanlen 1.116 + ); 1.117 +int uCheckAndScan2ByteGRPrefix8EA7( 1.118 + int32_t* state, 1.119 + unsigned char *in, 1.120 + uint16_t *out, 1.121 + uint32_t inbuflen, 1.122 + uint32_t* inscanlen 1.123 + ); 1.124 +int uCnSAlways8BytesDecomposedHangul( 1.125 + int32_t* state, 1.126 + unsigned char *in, 1.127 + uint16_t *out, 1.128 + uint32_t inbuflen, 1.129 + uint32_t* inscanlen 1.130 + ); 1.131 +int uCheckAndScanJohabHangul( 1.132 + int32_t* state, 1.133 + unsigned char *in, 1.134 + uint16_t *out, 1.135 + uint32_t inbuflen, 1.136 + uint32_t* inscanlen 1.137 + ); 1.138 +int uCheckAndScanJohabSymbol( 1.139 + int32_t* state, 1.140 + unsigned char *in, 1.141 + uint16_t *out, 1.142 + uint32_t inbuflen, 1.143 + uint32_t* inscanlen 1.144 + ); 1.145 + 1.146 +int uCheckAndScan4BytesGB18030( 1.147 + int32_t* state, 1.148 + unsigned char *in, 1.149 + uint16_t *out, 1.150 + uint32_t inbuflen, 1.151 + uint32_t* inscanlen 1.152 + ); 1.153 + 1.154 +int uScanAlways2Byte( 1.155 + unsigned char* in, 1.156 + uint16_t* out 1.157 + ); 1.158 +int uScanAlways2ByteShiftGR( 1.159 + unsigned char* in, 1.160 + uint16_t* out 1.161 + ); 1.162 +int uScanAlways1Byte( 1.163 + unsigned char* in, 1.164 + uint16_t* out 1.165 + ); 1.166 +int uScanAlways1BytePrefix8E( 1.167 + unsigned char* in, 1.168 + uint16_t* out 1.169 + ); 1.170 +/*================================================================================= 1.171 + 1.172 +=================================================================================*/ 1.173 +const uScannerFunc m_scanner[uNumOfCharsetType] = 1.174 +{ 1.175 + uCheckAndScanAlways1Byte, 1.176 + uCheckAndScanAlways2Byte, 1.177 + uCheckAndScanAlways2ByteShiftGR, 1.178 + uCheckAndScan2ByteGRPrefix8F, 1.179 + uCheckAndScan2ByteGRPrefix8EA2, 1.180 + uCheckAndScan2ByteGRPrefix8EA3, 1.181 + uCheckAndScan2ByteGRPrefix8EA4, 1.182 + uCheckAndScan2ByteGRPrefix8EA5, 1.183 + uCheckAndScan2ByteGRPrefix8EA6, 1.184 + uCheckAndScan2ByteGRPrefix8EA7, 1.185 + uCnSAlways8BytesDecomposedHangul, 1.186 + uCheckAndScanJohabHangul, 1.187 + uCheckAndScanJohabSymbol, 1.188 + uCheckAndScan4BytesGB18030, 1.189 + uCheckAndScanAlways2ByteGR128 1.190 +}; 1.191 + 1.192 +/*================================================================================= 1.193 + 1.194 +=================================================================================*/ 1.195 + 1.196 +const uSubScannerFunc m_subscanner[uNumOfCharType] = 1.197 +{ 1.198 + uScanAlways1Byte, 1.199 + uScanAlways2Byte, 1.200 + uScanAlways2ByteShiftGR, 1.201 + uScanAlways1BytePrefix8E 1.202 +}; 1.203 +/*================================================================================= 1.204 + 1.205 +=================================================================================*/ 1.206 +int uScan( 1.207 + uScanClassID scanClass, 1.208 + int32_t* state, 1.209 + unsigned char *in, 1.210 + uint16_t *out, 1.211 + uint32_t inbuflen, 1.212 + uint32_t* inscanlen 1.213 + ) 1.214 +{ 1.215 + return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen); 1.216 +} 1.217 +/*================================================================================= 1.218 + 1.219 +=================================================================================*/ 1.220 +int uScanAlways1Byte( 1.221 + unsigned char* in, 1.222 + uint16_t* out 1.223 + ) 1.224 +{ 1.225 + *out = (uint16_t) in[0]; 1.226 + return 1; 1.227 +} 1.228 + 1.229 +/*================================================================================= 1.230 + 1.231 +=================================================================================*/ 1.232 +int uScanAlways2Byte( 1.233 + unsigned char* in, 1.234 + uint16_t* out 1.235 + ) 1.236 +{ 1.237 + *out = (uint16_t) (( in[0] << 8) | (in[1])); 1.238 + return 1; 1.239 +} 1.240 +/*================================================================================= 1.241 + 1.242 +=================================================================================*/ 1.243 +int uScanAlways2ByteShiftGR( 1.244 + unsigned char* in, 1.245 + uint16_t* out 1.246 + ) 1.247 +{ 1.248 + *out = (uint16_t) ((( in[0] << 8) | (in[1])) & 0x7F7F); 1.249 + return 1; 1.250 +} 1.251 + 1.252 +/*================================================================================= 1.253 + 1.254 +=================================================================================*/ 1.255 +int uScanAlways1BytePrefix8E( 1.256 + unsigned char* in, 1.257 + uint16_t* out 1.258 + ) 1.259 +{ 1.260 + *out = (uint16_t) in[1]; 1.261 + return 1; 1.262 +} 1.263 +/*================================================================================= 1.264 + 1.265 +=================================================================================*/ 1.266 +int uCheckAndScanAlways1Byte( 1.267 + int32_t* state, 1.268 + unsigned char *in, 1.269 + uint16_t *out, 1.270 + uint32_t inbuflen, 1.271 + uint32_t* inscanlen 1.272 + ) 1.273 +{ 1.274 + /* Don't check inlen. The caller should ensure it is larger than 0 */ 1.275 + *inscanlen = 1; 1.276 + *out = (uint16_t) in[0]; 1.277 + 1.278 + return 1; 1.279 +} 1.280 + 1.281 +/*================================================================================= 1.282 + 1.283 +=================================================================================*/ 1.284 +int uCheckAndScanAlways2Byte( 1.285 + int32_t* state, 1.286 + unsigned char *in, 1.287 + uint16_t *out, 1.288 + uint32_t inbuflen, 1.289 + uint32_t* inscanlen 1.290 + ) 1.291 +{ 1.292 + if(inbuflen < 2) 1.293 + return 0; 1.294 + else 1.295 + { 1.296 + *inscanlen = 2; 1.297 + *out = ((in[0] << 8) | ( in[1])) ; 1.298 + return 1; 1.299 + } 1.300 +} 1.301 +/*================================================================================= 1.302 + 1.303 +=================================================================================*/ 1.304 +int uCheckAndScanAlways2ByteShiftGR( 1.305 + int32_t* state, 1.306 + unsigned char *in, 1.307 + uint16_t *out, 1.308 + uint32_t inbuflen, 1.309 + uint32_t* inscanlen 1.310 + ) 1.311 +{ 1.312 + /* 1.313 + * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets 1.314 + * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets. 1.315 + * Only 2nd byte range needs to be checked because 1.316 + * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp 1.317 + */ 1.318 + if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ 1.319 + return 0; 1.320 + else if (! CHK_GR94(in[1])) 1.321 + { 1.322 + *inscanlen = 2; 1.323 + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ 1.324 + return 1; 1.325 + } 1.326 + else 1.327 + { 1.328 + *inscanlen = 2; 1.329 + *out = (((in[0] << 8) | ( in[1])) & 0x7F7F); 1.330 + return 1; 1.331 + } 1.332 +} 1.333 +/*================================================================================= 1.334 + 1.335 +=================================================================================*/ 1.336 +int uCheckAndScanAlways2ByteGR128( 1.337 + int32_t* state, 1.338 + unsigned char *in, 1.339 + uint16_t *out, 1.340 + uint32_t inbuflen, 1.341 + uint32_t* inscanlen 1.342 + ) 1.343 +{ 1.344 + /* 1.345 + * The first byte should be in [0xa1,0xfe] 1.346 + * and the second byte in [0x41,0xfe] 1.347 + * Used by CP949 -> Unicode converter. 1.348 + * Only 2nd byte range needs to be checked because 1.349 + * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp 1.350 + */ 1.351 + if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ 1.352 + return 0; 1.353 + else if (in[1] < 0x41) /* 2nd byte range check */ 1.354 + { 1.355 + *inscanlen = 2; 1.356 + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ 1.357 + return 1; 1.358 + } 1.359 + else 1.360 + { 1.361 + *inscanlen = 2; 1.362 + *out = (in[0] << 8) | in[1]; 1.363 + return 1; 1.364 + } 1.365 +} 1.366 +/*================================================================================= 1.367 + 1.368 +=================================================================================*/ 1.369 +int uScanShift( 1.370 + uShiftInTable *shift, 1.371 + int32_t* state, 1.372 + unsigned char *in, 1.373 + uint16_t *out, 1.374 + uint32_t inbuflen, 1.375 + uint32_t* inscanlen 1.376 + ) 1.377 +{ 1.378 + int16_t i; 1.379 + const uShiftInCell* cell = &(shift->shiftcell[0]); 1.380 + int16_t itemnum = shift->numOfItem; 1.381 + for(i=0;i<itemnum;i++) 1.382 + { 1.383 + if( ( in[0] >= cell[i].shiftin_Min) && 1.384 + ( in[0] <= cell[i].shiftin_Max)) 1.385 + { 1.386 + if(inbuflen < cell[i].reserveLen) 1.387 + return 0; 1.388 + else 1.389 + { 1.390 + *inscanlen = cell[i].reserveLen; 1.391 + return (uSubScanner(cell[i].classID,in,out)); 1.392 + } 1.393 + } 1.394 + } 1.395 + return 0; 1.396 +} 1.397 +/*================================================================================= 1.398 + 1.399 +=================================================================================*/ 1.400 +int uCheckAndScan2ByteGRPrefix8F( 1.401 + int32_t* state, 1.402 + unsigned char *in, 1.403 + uint16_t *out, 1.404 + uint32_t inbuflen, 1.405 + uint32_t* inscanlen 1.406 + ) 1.407 +{ 1.408 + if((inbuflen < 3) ||(in[0] != 0x8F)) 1.409 + return 0; 1.410 + else if (! CHK_GR94(in[1])) /* 2nd byte range check */ 1.411 + { 1.412 + *inscanlen = 2; 1.413 + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ 1.414 + return 1; 1.415 + } 1.416 + else if (! CHK_GR94(in[2])) /* 3rd byte range check */ 1.417 + { 1.418 + *inscanlen = 3; 1.419 + *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ 1.420 + return 1; 1.421 + } 1.422 + else 1.423 + { 1.424 + *inscanlen = 3; 1.425 + *out = (((in[1] << 8) | ( in[2])) & 0x7F7F); 1.426 + return 1; 1.427 + } 1.428 +} 1.429 +/*================================================================================= 1.430 + 1.431 +=================================================================================*/ 1.432 + 1.433 +/* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX() 1.434 + * where X is 2,3,4,5,6,7 1.435 + */ 1.436 +#define CNS_8EAX_4BYTE(PREFIX) \ 1.437 + if((inbuflen < 4) || (in[0] != 0x8E)) \ 1.438 + return 0; \ 1.439 + else if((in[1] != (PREFIX))) \ 1.440 + { \ 1.441 + *inscanlen = 2; \ 1.442 + *out = 0xFF; \ 1.443 + return 1; \ 1.444 + } \ 1.445 + else if(! CHK_GR94(in[2])) \ 1.446 + { \ 1.447 + *inscanlen = 3; \ 1.448 + *out = 0xFF; \ 1.449 + return 1; \ 1.450 + } \ 1.451 + else if(! CHK_GR94(in[3])) \ 1.452 + { \ 1.453 + *inscanlen = 4; \ 1.454 + *out = 0xFF; \ 1.455 + return 1; \ 1.456 + } \ 1.457 + else \ 1.458 + { \ 1.459 + *inscanlen = 4; \ 1.460 + *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \ 1.461 + return 1; \ 1.462 + } 1.463 + 1.464 +int uCheckAndScan2ByteGRPrefix8EA2( 1.465 + int32_t* state, 1.466 + unsigned char *in, 1.467 + uint16_t *out, 1.468 + uint32_t inbuflen, 1.469 + uint32_t* inscanlen 1.470 + ) 1.471 +{ 1.472 + CNS_8EAX_4BYTE(0xA2) 1.473 +} 1.474 + 1.475 +/*================================================================================= 1.476 + 1.477 +=================================================================================*/ 1.478 +int uCheckAndScan2ByteGRPrefix8EA3( 1.479 + int32_t* state, 1.480 + unsigned char *in, 1.481 + uint16_t *out, 1.482 + uint32_t inbuflen, 1.483 + uint32_t* inscanlen 1.484 + ) 1.485 +{ 1.486 + CNS_8EAX_4BYTE(0xA3) 1.487 +} 1.488 +/*================================================================================= 1.489 + 1.490 +=================================================================================*/ 1.491 +int uCheckAndScan2ByteGRPrefix8EA4( 1.492 + int32_t* state, 1.493 + unsigned char *in, 1.494 + uint16_t *out, 1.495 + uint32_t inbuflen, 1.496 + uint32_t* inscanlen 1.497 + ) 1.498 +{ 1.499 + CNS_8EAX_4BYTE(0xA4) 1.500 +} 1.501 +/*================================================================================= 1.502 + 1.503 +=================================================================================*/ 1.504 +int uCheckAndScan2ByteGRPrefix8EA5( 1.505 + int32_t* state, 1.506 + unsigned char *in, 1.507 + uint16_t *out, 1.508 + uint32_t inbuflen, 1.509 + uint32_t* inscanlen 1.510 + ) 1.511 +{ 1.512 + CNS_8EAX_4BYTE(0xA5) 1.513 +} 1.514 +/*================================================================================= 1.515 + 1.516 +=================================================================================*/ 1.517 +int uCheckAndScan2ByteGRPrefix8EA6( 1.518 + int32_t* state, 1.519 + unsigned char *in, 1.520 + uint16_t *out, 1.521 + uint32_t inbuflen, 1.522 + uint32_t* inscanlen 1.523 + ) 1.524 +{ 1.525 + CNS_8EAX_4BYTE(0xA6) 1.526 +} 1.527 +/*================================================================================= 1.528 + 1.529 +=================================================================================*/ 1.530 +int uCheckAndScan2ByteGRPrefix8EA7( 1.531 + int32_t* state, 1.532 + unsigned char *in, 1.533 + uint16_t *out, 1.534 + uint32_t inbuflen, 1.535 + uint32_t* inscanlen 1.536 + ) 1.537 +{ 1.538 + CNS_8EAX_4BYTE(0xA7) 1.539 +} 1.540 +/*================================================================================= 1.541 + 1.542 +=================================================================================*/ 1.543 +#define SBase 0xAC00 1.544 +#define SCount 11172 1.545 +#define LCount 19 1.546 +#define VCount 21 1.547 +#define TCount 28 1.548 +#define NCount (VCount * TCount) 1.549 + 1.550 +int uCnSAlways8BytesDecomposedHangul( 1.551 + int32_t* state, 1.552 + unsigned char *in, 1.553 + uint16_t *out, 1.554 + uint32_t inbuflen, 1.555 + uint32_t* inscanlen 1.556 + ) 1.557 +{ 1.558 + 1.559 + uint16_t LIndex, VIndex, TIndex; 1.560 + /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */ 1.561 + if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) || 1.562 + (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6])) 1.563 + return 0; 1.564 + 1.565 + /* Compute LIndex */ 1.566 + if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */ 1.567 + return 0; 1.568 + } 1.569 + else { 1.570 + static const uint8_t lMap[] = { 1.571 + /* A1 A2 A3 A4 A5 A6 A7 */ 1.572 + 0, 1,0xff, 2,0xff,0xff, 3, 1.573 + /* A8 A9 AA AB AC AD AE AF */ 1.574 + 4, 5,0xff,0xff,0xff,0xff,0xff,0xff, 1.575 + /* B0 B1 B2 B3 B4 B5 B6 B7 */ 1.576 + 0xff, 6, 7, 8,0xff, 9, 10, 11, 1.577 + /* B8 B9 BA BB BC BD BE */ 1.578 + 12, 13, 14, 15, 16, 17, 18 1.579 + }; 1.580 + 1.581 + LIndex = lMap[in[3] - 0xa1]; 1.582 + if(0xff == (0xff & LIndex)) 1.583 + return 0; 1.584 + } 1.585 + 1.586 + /* Compute VIndex */ 1.587 + if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */ 1.588 + return 0; 1.589 + } 1.590 + else { 1.591 + VIndex = in[5] - 0xbf; 1.592 + } 1.593 + 1.594 + /* Compute TIndex */ 1.595 + if(0xd4 == in[7]) 1.596 + { 1.597 + TIndex = 0; 1.598 + } 1.599 + else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */ 1.600 + return 0; 1.601 + } 1.602 + else { 1.603 + static const uint8_t tMap[] = { 1.604 + /* A1 A2 A3 A4 A5 A6 A7 */ 1.605 + 1, 2, 3, 4, 5, 6, 7, 1.606 + /* A8 A9 AA AB AC AD AE AF */ 1.607 + 0xff, 8, 9, 10, 11, 12, 13, 14, 1.608 + /* B0 B1 B2 B3 B4 B5 B6 B7 */ 1.609 + 15, 16, 17,0xff, 18, 19, 20, 21, 1.610 + /* B8 B9 BA BB BC BD BE */ 1.611 + 22,0xff, 23, 24, 25, 26, 27 1.612 + }; 1.613 + TIndex = tMap[in[7] - 0xa1]; 1.614 + if(0xff == (0xff & TIndex)) 1.615 + return 0; 1.616 + } 1.617 + 1.618 + *inscanlen = 8; 1.619 + /* the following line is from Unicode 2.0 page 3-13 item 5 */ 1.620 + *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; 1.621 + 1.622 + return 1; 1.623 +} 1.624 +/*================================================================================= 1.625 + 1.626 +=================================================================================*/ 1.627 + 1.628 +int uCheckAndScanJohabHangul( 1.629 + int32_t* state, 1.630 + unsigned char *in, 1.631 + uint16_t *out, 1.632 + uint32_t inbuflen, 1.633 + uint32_t* inscanlen 1.634 + ) 1.635 +{ 1.636 +/* since we don't have code to convert Johab to Unicode right now * 1.637 + * make this part of code #if 0 to save space until we fully test it */ 1.638 + if(inbuflen < 2) 1.639 + return 0; 1.640 + else { 1.641 + /* 1.642 + * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183 1.643 + * of "CJKV Information Processing" for details 1.644 + */ 1.645 + static const uint8_t lMap[32]={ /* totaly 19 */ 1.646 + 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */ 1.647 + 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */ 1.648 + 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */ 1.649 + 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */ 1.650 + }; 1.651 + static const uint8_t vMap[32]={ /* totaly 21 */ 1.652 + 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */ 1.653 + 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */ 1.654 + 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */ 1.655 + 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */ 1.656 + }; 1.657 + static const uint8_t tMap[32]={ /* totaly 29 */ 1.658 + 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */ 1.659 + 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */ 1.660 + 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */ 1.661 + 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */ 1.662 + }; 1.663 + uint16_t ch = (in[0] << 8) | in[1]; 1.664 + uint16_t LIndex, VIndex, TIndex; 1.665 + if(0 == (0x8000 & ch)) 1.666 + return 0; 1.667 + LIndex=lMap[(ch>>10)& 0x1F]; 1.668 + VIndex=vMap[(ch>>5) & 0x1F]; 1.669 + TIndex=tMap[(ch>>0) & 0x1F]; 1.670 + if((0xff==(LIndex)) || 1.671 + (0xff==(VIndex)) || 1.672 + (0xff==(TIndex))) 1.673 + return 0; 1.674 + /* the following line is from Unicode 2.0 page 3-13 item 5 */ 1.675 + *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; 1.676 + *inscanlen = 2; 1.677 + return 1; 1.678 + } 1.679 +} 1.680 +int uCheckAndScanJohabSymbol( 1.681 + int32_t* state, 1.682 + unsigned char *in, 1.683 + uint16_t *out, 1.684 + uint32_t inbuflen, 1.685 + uint32_t* inscanlen 1.686 + ) 1.687 +{ 1.688 + if(inbuflen < 2) 1.689 + return 0; 1.690 + else { 1.691 + /* 1.692 + * The following code are based on the Perl code lised under 1.693 + * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of 1.694 + * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com> 1.695 + * 1.696 + * sub johab2ks ($) { # Convert Johab to ISO-2022-KR 1.697 + * my @johab = unpack("C*", $_[0]); 1.698 + * my ($offset, $d8_off) = (0,0); 1.699 + * my @out = (); 1.700 + * while(($hi, $lo) = splice($johab, 0, 2)) { 1.701 + * $offset = 1 if ($hi > 223 and $hi < 250); 1.702 + * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); 1.703 + * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) - 1.704 + * ($lo < 161 ? 1 : 0) + $offset) + $d8_off), 1.705 + * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 )); 1.706 + * } 1.707 + * return pack ("C*", @out); 1.708 + * } 1.709 + * additional comments from Ken Lunde 1.710 + * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); 1.711 + * has three possible return values: 1.712 + * 0 if $hi is not equal to 216 1.713 + * 94 if $hi is euqal to 216 and if $lo is greater than 160 1.714 + * 42 if $hi is euqal to 216 and if $lo is not greater than 160 1.715 + */ 1.716 + unsigned char hi = in[0]; 1.717 + unsigned char lo = in[1]; 1.718 + uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0; 1.719 + uint16_t d8_off = 0; 1.720 + if(216 == hi) { 1.721 + if( lo > 160) 1.722 + d8_off = 94; 1.723 + else 1.724 + d8_off = 42; 1.725 + } 1.726 + 1.727 + *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) - 1.728 + (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) | 1.729 + (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : 1.730 + 128)); 1.731 + *inscanlen = 2; 1.732 + return 1; 1.733 + } 1.734 +} 1.735 +int uCheckAndScan4BytesGB18030( 1.736 + int32_t* state, 1.737 + unsigned char *in, 1.738 + uint16_t *out, 1.739 + uint32_t inbuflen, 1.740 + uint32_t* inscanlen 1.741 + ) 1.742 +{ 1.743 + uint32_t data; 1.744 + if(inbuflen < 4) 1.745 + return 0; 1.746 + 1.747 + if((in[0] < 0x81 ) || (0xfe < in[0])) 1.748 + return 0; 1.749 + if((in[1] < 0x30 ) || (0x39 < in[1])) 1.750 + return 0; 1.751 + if((in[2] < 0x81 ) || (0xfe < in[2])) 1.752 + return 0; 1.753 + if((in[3] < 0x30 ) || (0x39 < in[3])) 1.754 + return 0; 1.755 + 1.756 + data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + 1.757 + (in[2] - 0x81)) * 10 ) + (in[3] - 0x30); 1.758 + 1.759 + *inscanlen = 4; 1.760 + *out = (data < 0x00010000) ? data : 0xFFFD; 1.761 + return 1; 1.762 +}