1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/util/ugen.c Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,712 @@ 1.4 +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 +#include "unicpriv.h" 1.9 +/*================================================================================= 1.10 + 1.11 +=================================================================================*/ 1.12 +typedef int (*uSubGeneratorFunc) (uint16_t in, unsigned char* out); 1.13 +/*================================================================================= 1.14 + 1.15 +=================================================================================*/ 1.16 + 1.17 +typedef int (*uGeneratorFunc) ( 1.18 + int32_t* state, 1.19 + uint16_t in, 1.20 + unsigned char* out, 1.21 + uint32_t outbuflen, 1.22 + uint32_t* outlen 1.23 + ); 1.24 + 1.25 +int uGenerate( 1.26 + uScanClassID scanClass, 1.27 + int32_t* state, 1.28 + uint16_t in, 1.29 + unsigned char* out, 1.30 + uint32_t outbuflen, 1.31 + uint32_t* outlen 1.32 + ); 1.33 + 1.34 +#define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out)) 1.35 + 1.36 +int uCheckAndGenAlways1Byte( 1.37 + int32_t* state, 1.38 + uint16_t in, 1.39 + unsigned char* out, 1.40 + uint32_t outbuflen, 1.41 + uint32_t* outlen 1.42 + ); 1.43 +int uCheckAndGenAlways2Byte( 1.44 + int32_t* state, 1.45 + uint16_t in, 1.46 + unsigned char* out, 1.47 + uint32_t outbuflen, 1.48 + uint32_t* outlen 1.49 + ); 1.50 +int uCheckAndGenAlways2ByteShiftGR( 1.51 + int32_t* state, 1.52 + uint16_t in, 1.53 + unsigned char* out, 1.54 + uint32_t outbuflen, 1.55 + uint32_t* outlen 1.56 + ); 1.57 +int uGenerateShift( 1.58 + uShiftOutTable *shift, 1.59 + int32_t* state, 1.60 + uint16_t in, 1.61 + unsigned char* out, 1.62 + uint32_t outbuflen, 1.63 + uint32_t* outlen 1.64 + ); 1.65 +int uCheckAndGen2ByteGRPrefix8F( 1.66 + int32_t* state, 1.67 + uint16_t in, 1.68 + unsigned char* out, 1.69 + uint32_t outbuflen, 1.70 + uint32_t* outlen 1.71 + ); 1.72 +int uCheckAndGen2ByteGRPrefix8EA2( 1.73 + int32_t* state, 1.74 + uint16_t in, 1.75 + unsigned char* out, 1.76 + uint32_t outbuflen, 1.77 + uint32_t* outlen 1.78 + ); 1.79 + 1.80 +int uCheckAndGen2ByteGRPrefix8EA3( 1.81 + int32_t* state, 1.82 + uint16_t in, 1.83 + unsigned char* out, 1.84 + uint32_t outbuflen, 1.85 + uint32_t* outlen 1.86 + ); 1.87 + 1.88 +int uCheckAndGen2ByteGRPrefix8EA4( 1.89 + int32_t* state, 1.90 + uint16_t in, 1.91 + unsigned char* out, 1.92 + uint32_t outbuflen, 1.93 + uint32_t* outlen 1.94 + ); 1.95 + 1.96 +int uCheckAndGen2ByteGRPrefix8EA5( 1.97 + int32_t* state, 1.98 + uint16_t in, 1.99 + unsigned char* out, 1.100 + uint32_t outbuflen, 1.101 + uint32_t* outlen 1.102 + ); 1.103 + 1.104 +int uCheckAndGen2ByteGRPrefix8EA6( 1.105 + int32_t* state, 1.106 + uint16_t in, 1.107 + unsigned char* out, 1.108 + uint32_t outbuflen, 1.109 + uint32_t* outlen 1.110 + ); 1.111 + 1.112 +int uCheckAndGen2ByteGRPrefix8EA7( 1.113 + int32_t* state, 1.114 + uint16_t in, 1.115 + unsigned char* out, 1.116 + uint32_t outbuflen, 1.117 + uint32_t* outlen 1.118 + ); 1.119 +int uCnGAlways8BytesDecomposedHangul( 1.120 + int32_t* state, 1.121 + uint16_t in, 1.122 + unsigned char* out, 1.123 + uint32_t outbuflen, 1.124 + uint32_t* outlen 1.125 + ); 1.126 + 1.127 +int uCheckAndGenJohabHangul( 1.128 + int32_t* state, 1.129 + uint16_t in, 1.130 + unsigned char* out, 1.131 + uint32_t outbuflen, 1.132 + uint32_t* outlen 1.133 + ); 1.134 + 1.135 +int uCheckAndGenJohabSymbol( 1.136 + int32_t* state, 1.137 + uint16_t in, 1.138 + unsigned char* out, 1.139 + uint32_t outbuflen, 1.140 + uint32_t* outlen 1.141 + ); 1.142 + 1.143 + 1.144 +int uCheckAndGen4BytesGB18030( 1.145 + int32_t* state, 1.146 + uint16_t in, 1.147 + unsigned char* out, 1.148 + uint32_t outbuflen, 1.149 + uint32_t* outlen 1.150 + ); 1.151 + 1.152 +int uGenAlways2Byte( 1.153 + uint16_t in, 1.154 + unsigned char* out 1.155 + ); 1.156 +int uGenAlways2ByteShiftGR( 1.157 + uint16_t in, 1.158 + unsigned char* out 1.159 + ); 1.160 +int uGenAlways1Byte( 1.161 + uint16_t in, 1.162 + unsigned char* out 1.163 + ); 1.164 +int uGenAlways1BytePrefix8E( 1.165 + uint16_t in, 1.166 + unsigned char* out 1.167 + ); 1.168 +/*================================================================================= 1.169 + 1.170 +=================================================================================*/ 1.171 +const uGeneratorFunc m_generator[uNumOfCharsetType] = 1.172 +{ 1.173 + uCheckAndGenAlways1Byte, 1.174 + uCheckAndGenAlways2Byte, 1.175 + uCheckAndGenAlways2ByteShiftGR, 1.176 + uCheckAndGen2ByteGRPrefix8F, 1.177 + uCheckAndGen2ByteGRPrefix8EA2, 1.178 + uCheckAndGen2ByteGRPrefix8EA3, 1.179 + uCheckAndGen2ByteGRPrefix8EA4, 1.180 + uCheckAndGen2ByteGRPrefix8EA5, 1.181 + uCheckAndGen2ByteGRPrefix8EA6, 1.182 + uCheckAndGen2ByteGRPrefix8EA7, 1.183 + uCnGAlways8BytesDecomposedHangul, 1.184 + uCheckAndGenJohabHangul, 1.185 + uCheckAndGenJohabSymbol, 1.186 + uCheckAndGen4BytesGB18030, 1.187 + uCheckAndGenAlways2Byte /* place-holder for GR128 */ 1.188 +}; 1.189 + 1.190 +/*================================================================================= 1.191 + 1.192 +=================================================================================*/ 1.193 + 1.194 +const uSubGeneratorFunc m_subgenerator[uNumOfCharType] = 1.195 +{ 1.196 + uGenAlways1Byte, 1.197 + uGenAlways2Byte, 1.198 + uGenAlways2ByteShiftGR, 1.199 + uGenAlways1BytePrefix8E 1.200 +}; 1.201 +/*================================================================================= 1.202 + 1.203 +=================================================================================*/ 1.204 +int uGenerate( 1.205 + uScanClassID scanClass, 1.206 + int32_t* state, 1.207 + uint16_t in, 1.208 + unsigned char* out, 1.209 + uint32_t outbuflen, 1.210 + uint32_t* outlen 1.211 + ) 1.212 +{ 1.213 + return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen); 1.214 +} 1.215 +/*================================================================================= 1.216 + 1.217 +=================================================================================*/ 1.218 +int uGenAlways1Byte( 1.219 + uint16_t in, 1.220 + unsigned char* out 1.221 + ) 1.222 +{ 1.223 + out[0] = (unsigned char)in; 1.224 + return 1; 1.225 +} 1.226 + 1.227 +/*================================================================================= 1.228 + 1.229 +=================================================================================*/ 1.230 +int uGenAlways2Byte( 1.231 + uint16_t in, 1.232 + unsigned char* out 1.233 + ) 1.234 +{ 1.235 + out[0] = (unsigned char)((in >> 8) & 0xff); 1.236 + out[1] = (unsigned char)(in & 0xff); 1.237 + return 1; 1.238 +} 1.239 +/*================================================================================= 1.240 + 1.241 +=================================================================================*/ 1.242 +int uGenAlways2ByteShiftGR( 1.243 + uint16_t in, 1.244 + unsigned char* out 1.245 + ) 1.246 +{ 1.247 + out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80); 1.248 + out[1] = (unsigned char)((in & 0xff) | 0x80); 1.249 + return 1; 1.250 +} 1.251 +/*================================================================================= 1.252 + 1.253 +=================================================================================*/ 1.254 +int uGenAlways1BytePrefix8E( 1.255 + uint16_t in, 1.256 + unsigned char* out 1.257 + ) 1.258 +{ 1.259 + out[0] = 0x8E; 1.260 + out[1] = (unsigned char)(in & 0xff); 1.261 + return 1; 1.262 +} 1.263 +/*================================================================================= 1.264 + 1.265 +=================================================================================*/ 1.266 +int uCheckAndGenAlways1Byte( 1.267 + int32_t* state, 1.268 + uint16_t in, 1.269 + unsigned char* out, 1.270 + uint32_t outbuflen, 1.271 + uint32_t* outlen 1.272 + ) 1.273 +{ 1.274 + /* Don't check inlen. The caller should ensure it is larger than 0 */ 1.275 + /* Oops, I don't agree. Code changed to check every time. [CATA] */ 1.276 + if(outbuflen < 1) 1.277 + return 0; 1.278 + else 1.279 + { 1.280 + *outlen = 1; 1.281 + out[0] = in & 0xff; 1.282 + return 1; 1.283 + } 1.284 +} 1.285 + 1.286 +/*================================================================================= 1.287 + 1.288 +=================================================================================*/ 1.289 +int uCheckAndGenAlways2Byte( 1.290 + int32_t* state, 1.291 + uint16_t in, 1.292 + unsigned char* out, 1.293 + uint32_t outbuflen, 1.294 + uint32_t* outlen 1.295 + ) 1.296 +{ 1.297 + if(outbuflen < 2) 1.298 + return 0; 1.299 + else 1.300 + { 1.301 + *outlen = 2; 1.302 + out[0] = ((in >> 8 ) & 0xff); 1.303 + out[1] = in & 0xff; 1.304 + return 1; 1.305 + } 1.306 +} 1.307 +/*================================================================================= 1.308 + 1.309 +=================================================================================*/ 1.310 +int uCheckAndGenAlways2ByteShiftGR( 1.311 + int32_t* state, 1.312 + uint16_t in, 1.313 + unsigned char* out, 1.314 + uint32_t outbuflen, 1.315 + uint32_t* outlen 1.316 + ) 1.317 +{ 1.318 + if(outbuflen < 2) 1.319 + return 0; 1.320 + else 1.321 + { 1.322 + *outlen = 2; 1.323 + out[0] = ((in >> 8 ) & 0xff) | 0x80; 1.324 + out[1] = (in & 0xff) | 0x80; 1.325 + return 1; 1.326 + } 1.327 +} 1.328 +/*================================================================================= 1.329 + 1.330 +=================================================================================*/ 1.331 +int uGenerateShift( 1.332 + uShiftOutTable *shift, 1.333 + int32_t* state, 1.334 + uint16_t in, 1.335 + unsigned char* out, 1.336 + uint32_t outbuflen, 1.337 + uint32_t* outlen 1.338 + ) 1.339 +{ 1.340 + int16_t i; 1.341 + const uShiftOutCell* cell = &(shift->shiftcell[0]); 1.342 + int16_t itemnum = shift->numOfItem; 1.343 + unsigned char inH, inL; 1.344 + inH = (in >> 8) & 0xff; 1.345 + inL = (in & 0xff ); 1.346 + for(i=0;i<itemnum;i++) 1.347 + { 1.348 + if( ( inL >= cell[i].shiftout_MinLB) && 1.349 + ( inL <= cell[i].shiftout_MaxLB) && 1.350 + ( inH >= cell[i].shiftout_MinHB) && 1.351 + ( inH <= cell[i].shiftout_MaxHB) ) 1.352 + { 1.353 + if(outbuflen < cell[i].reserveLen) 1.354 + { 1.355 + return 0; 1.356 + } 1.357 + else 1.358 + { 1.359 + *outlen = cell[i].reserveLen; 1.360 + return (uSubGenerator(cell[i].classID,in,out)); 1.361 + } 1.362 + } 1.363 + } 1.364 + return 0; 1.365 +} 1.366 +/*================================================================================= 1.367 + 1.368 +=================================================================================*/ 1.369 +int uCheckAndGen2ByteGRPrefix8F(int32_t* state, 1.370 + uint16_t in, 1.371 + unsigned char* out, 1.372 + uint32_t outbuflen, 1.373 + uint32_t* outlen 1.374 + ) 1.375 +{ 1.376 + if(outbuflen < 3) 1.377 + return 0; 1.378 + else 1.379 + { 1.380 + *outlen = 3; 1.381 + out[0] = 0x8F; 1.382 + out[1] = ((in >> 8 ) & 0xff) | 0x80; 1.383 + out[2] = (in & 0xff) | 0x80; 1.384 + return 1; 1.385 + } 1.386 +} 1.387 +/*================================================================================= 1.388 + 1.389 +=================================================================================*/ 1.390 +int uCheckAndGen2ByteGRPrefix8EA2(int32_t* state, 1.391 + uint16_t in, 1.392 + unsigned char* out, 1.393 + uint32_t outbuflen, 1.394 + uint32_t* outlen 1.395 + ) 1.396 +{ 1.397 + if(outbuflen < 4) 1.398 + return 0; 1.399 + else 1.400 + { 1.401 + *outlen = 4; 1.402 + out[0] = 0x8E; 1.403 + out[1] = 0xA2; 1.404 + out[2] = ((in >> 8 ) & 0xff) | 0x80; 1.405 + out[3] = (in & 0xff) | 0x80; 1.406 + return 1; 1.407 + } 1.408 +} 1.409 + 1.410 + 1.411 +/*================================================================================= 1.412 + 1.413 +=================================================================================*/ 1.414 +int uCheckAndGen2ByteGRPrefix8EA3(int32_t* state, 1.415 + uint16_t in, 1.416 + unsigned char* out, 1.417 + uint32_t outbuflen, 1.418 + uint32_t* outlen 1.419 + ) 1.420 +{ 1.421 + if(outbuflen < 4) 1.422 + return 0; 1.423 + else 1.424 + { 1.425 + *outlen = 4; 1.426 + out[0] = 0x8E; 1.427 + out[1] = 0xA3; 1.428 + out[2] = ((in >> 8 ) & 0xff) | 0x80; 1.429 + out[3] = (in & 0xff) | 0x80; 1.430 + return 1; 1.431 + } 1.432 +} 1.433 +/*================================================================================= 1.434 + 1.435 +=================================================================================*/ 1.436 +int uCheckAndGen2ByteGRPrefix8EA4(int32_t* state, 1.437 + uint16_t in, 1.438 + unsigned char* out, 1.439 + uint32_t outbuflen, 1.440 + uint32_t* outlen 1.441 + ) 1.442 +{ 1.443 + if(outbuflen < 4) 1.444 + return 0; 1.445 + else 1.446 + { 1.447 + *outlen = 4; 1.448 + out[0] = 0x8E; 1.449 + out[1] = 0xA4; 1.450 + out[2] = ((in >> 8 ) & 0xff) | 0x80; 1.451 + out[3] = (in & 0xff) | 0x80; 1.452 + return 1; 1.453 + } 1.454 +} 1.455 +/*================================================================================= 1.456 + 1.457 +=================================================================================*/ 1.458 +int uCheckAndGen2ByteGRPrefix8EA5(int32_t* state, 1.459 + uint16_t in, 1.460 + unsigned char* out, 1.461 + uint32_t outbuflen, 1.462 + uint32_t* outlen 1.463 + ) 1.464 +{ 1.465 + if(outbuflen < 4) 1.466 + return 0; 1.467 + else 1.468 + { 1.469 + *outlen = 4; 1.470 + out[0] = 0x8E; 1.471 + out[1] = 0xA5; 1.472 + out[2] = ((in >> 8 ) & 0xff) | 0x80; 1.473 + out[3] = (in & 0xff) | 0x80; 1.474 + return 1; 1.475 + } 1.476 +} 1.477 +/*================================================================================= 1.478 + 1.479 +=================================================================================*/ 1.480 +int uCheckAndGen2ByteGRPrefix8EA6(int32_t* state, 1.481 + uint16_t in, 1.482 + unsigned char* out, 1.483 + uint32_t outbuflen, 1.484 + uint32_t* outlen 1.485 + ) 1.486 +{ 1.487 + if(outbuflen < 4) 1.488 + return 0; 1.489 + else 1.490 + { 1.491 + *outlen = 4; 1.492 + out[0] = 0x8E; 1.493 + out[1] = 0xA6; 1.494 + out[2] = ((in >> 8 ) & 0xff) | 0x80; 1.495 + out[3] = (in & 0xff) | 0x80; 1.496 + return 1; 1.497 + } 1.498 +} 1.499 +/*================================================================================= 1.500 + 1.501 +=================================================================================*/ 1.502 +int uCheckAndGen2ByteGRPrefix8EA7(int32_t* state, 1.503 + uint16_t in, 1.504 + unsigned char* out, 1.505 + uint32_t outbuflen, 1.506 + uint32_t* outlen 1.507 + ) 1.508 +{ 1.509 + if(outbuflen < 4) 1.510 + return 0; 1.511 + else 1.512 + { 1.513 + *outlen = 4; 1.514 + out[0] = 0x8E; 1.515 + out[1] = 0xA7; 1.516 + out[2] = ((in >> 8 ) & 0xff) | 0x80; 1.517 + out[3] = (in & 0xff) | 0x80; 1.518 + return 1; 1.519 + } 1.520 +} 1.521 +/*================================================================================= 1.522 + 1.523 +=================================================================================*/ 1.524 +#define SBase 0xAC00 1.525 +#define LCount 19 1.526 +#define VCount 21 1.527 +#define TCount 28 1.528 +#define NCount (VCount * TCount) 1.529 +/*================================================================================= 1.530 + 1.531 +=================================================================================*/ 1.532 +int uCnGAlways8BytesDecomposedHangul( 1.533 + int32_t* state, 1.534 + uint16_t in, 1.535 + unsigned char* out, 1.536 + uint32_t outbuflen, 1.537 + uint32_t* outlen 1.538 + ) 1.539 +{ 1.540 + static const uint8_t lMap[LCount] = { 1.541 + 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5, 1.542 + 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe 1.543 + }; 1.544 + 1.545 + static const uint8_t tMap[TCount] = { 1.546 + 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa, 1.547 + 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5, 1.548 + 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe 1.549 + }; 1.550 + 1.551 + uint16_t SIndex, LIndex, VIndex, TIndex; 1.552 + 1.553 + if(outbuflen < 8) 1.554 + return 0; 1.555 + 1.556 + /* the following line are copy from Unicode 2.0 page 3-13 */ 1.557 + /* item 1 of Hangul Syllabel Decomposition */ 1.558 + SIndex = in - SBase; 1.559 + 1.560 + /* the following lines are copy from Unicode 2.0 page 3-14 */ 1.561 + /* item 2 of Hangul Syllabel Decomposition w/ modification */ 1.562 + LIndex = SIndex / NCount; 1.563 + VIndex = (SIndex % NCount) / TCount; 1.564 + TIndex = SIndex % TCount; 1.565 + 1.566 + /* 1.567 + * A Hangul syllable not enumerated in KS X 1001 is represented 1.568 + * by a sequence of 8 bytes beginning with Hangul-filler 1.569 + * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three 1.570 + * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making 1.571 + * up the syllable. ref. KS X 1001:1998 Annex 3 1.572 + */ 1.573 + *outlen = 8; 1.574 + out[0] = out[2] = out[4] = out[6] = 0xa4; 1.575 + out[1] = 0xd4; 1.576 + out[3] = lMap[LIndex] ; 1.577 + out[5] = (VIndex + 0xbf); 1.578 + out[7] = tMap[TIndex]; 1.579 + 1.580 + return 1; 1.581 +} 1.582 + 1.583 +int uCheckAndGenJohabHangul( 1.584 + int32_t* state, 1.585 + uint16_t in, 1.586 + unsigned char* out, 1.587 + uint32_t outbuflen, 1.588 + uint32_t* outlen 1.589 + ) 1.590 +{ 1.591 + if(outbuflen < 2) 1.592 + return 0; 1.593 + else 1.594 + { 1.595 + /* 1.596 + See Table 4-45 (page 183) of CJKV Information Processing 1.597 + for detail explanation of the following table. 1.598 + */ 1.599 + /* 1.600 + static const uint8_t lMap[LCount] = { 1.601 + 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20 1.602 + }; 1.603 + Therefore lMap[i] == i+2; 1.604 + */ 1.605 + 1.606 + static const uint8_t vMap[VCount] = { 1.607 + /* no 0,1,2 */ 1.608 + 3,4,5,6,7, /* no 8,9 */ 1.609 + 10,11,12,13,14,15, /* no 16,17 */ 1.610 + 18,19,20,21,22,23, /* no 24,25 */ 1.611 + 26,27,28,29 1.612 + }; 1.613 + static const uint8_t tMap[TCount] = { 1.614 + 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */ 1.615 + 19,20,21,22,23,24,25,26,27,28,29 1.616 + }; 1.617 + uint16_t SIndex, LIndex, VIndex, TIndex, ch; 1.618 + /* the following line are copy from Unicode 2.0 page 3-13 */ 1.619 + /* item 1 of Hangul Syllabel Decomposition */ 1.620 + SIndex = in - SBase; 1.621 + 1.622 + /* the following lines are copy from Unicode 2.0 page 3-14 */ 1.623 + /* item 2 of Hangul Syllabel Decomposition w/ modification */ 1.624 + LIndex = SIndex / NCount; 1.625 + VIndex = (SIndex % NCount) / TCount; 1.626 + TIndex = SIndex % TCount; 1.627 + 1.628 + *outlen = 2; 1.629 + ch = 0x8000 | 1.630 + ((LIndex+2)<<10) | 1.631 + (vMap[VIndex]<<5)| 1.632 + tMap[TIndex]; 1.633 + out[0] = (ch >> 8); 1.634 + out[1] = ch & 0x00FF; 1.635 +#if 0 1.636 + printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex); 1.637 +#endif 1.638 + return 1; 1.639 + } 1.640 +} 1.641 +int uCheckAndGenJohabSymbol( 1.642 + int32_t* state, 1.643 + uint16_t in, 1.644 + unsigned char* out, 1.645 + uint32_t outbuflen, 1.646 + uint32_t* outlen 1.647 + ) 1.648 +{ 1.649 + if(outbuflen < 2) 1.650 + return 0; 1.651 + else 1.652 + { 1.653 + /* The following code are based on the Perl code listed under 1.654 + * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013) 1.655 + * in the book "CJKV Information Processing" by 1.656 + * Ken Lunde <lunde@adobe.com> 1.657 + * 1.658 + * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab 1.659 + * my @euc = unpack("C*", $_[0]); 1.660 + * my ($fe_off, $hi_off, $lo_off) = (0,0,1); 1.661 + * my @out = (); 1.662 + * while(($hi, $lo) = splice(@euc, 0, 2)) { 1.663 + * $hi &= 127; $lo &= 127; 1.664 + * $fe_off = 21 if $hi == 73; 1.665 + * $fe_off = 34 if $hi == 126; 1.666 + * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125); 1.667 + * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off), 1.668 + * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128)); 1.669 + * } 1.670 + * return pack("C*", @out); 1.671 + */ 1.672 + 1.673 + unsigned char fe_off = 0; 1.674 + unsigned char hi_off = 0; 1.675 + unsigned char lo_off = 1; 1.676 + unsigned char hi = (in >> 8) & 0x7F; 1.677 + unsigned char lo = in & 0x7F; 1.678 + if(73 == hi) 1.679 + fe_off = 21; 1.680 + if(126 == hi) 1.681 + fe_off = 34; 1.682 + if( (hi < 74) || ( hi > 125) ) 1.683 + { 1.684 + hi_off = 1; 1.685 + lo_off = 0; 1.686 + } 1.687 + *outlen = 2; 1.688 + out[0] = ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off; 1.689 + out[1] = lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) : 1.690 + 128); 1.691 +#if 0 1.692 + printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in); 1.693 +#endif 1.694 + return 1; 1.695 + } 1.696 +} 1.697 +int uCheckAndGen4BytesGB18030( 1.698 + int32_t* state, 1.699 + uint16_t in, 1.700 + unsigned char* out, 1.701 + uint32_t outbuflen, 1.702 + uint32_t* outlen 1.703 + ) 1.704 +{ 1.705 + if(outbuflen < 4) 1.706 + return 0; 1.707 + out[0] = (in / (10*126*10)) + 0x81; 1.708 + in %= (10*126*10); 1.709 + out[1] = (in / (10*126)) + 0x30; 1.710 + in %= (10*126); 1.711 + out[2] = (in / (10)) + 0x81; 1.712 + out[3] = (in % 10) + 0x30; 1.713 + *outlen = 4; 1.714 + return 1; 1.715 +}