Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | #include "unicpriv.h" |
michael@0 | 6 | #define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff ) |
michael@0 | 7 | #define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2)) |
michael@0 | 8 | /*================================================================================= |
michael@0 | 9 | |
michael@0 | 10 | =================================================================================*/ |
michael@0 | 11 | typedef int (*uSubScannerFunc) (unsigned char* in, uint16_t* out); |
michael@0 | 12 | /*================================================================================= |
michael@0 | 13 | |
michael@0 | 14 | =================================================================================*/ |
michael@0 | 15 | |
michael@0 | 16 | typedef int (*uScannerFunc) ( |
michael@0 | 17 | int32_t* state, |
michael@0 | 18 | unsigned char *in, |
michael@0 | 19 | uint16_t *out, |
michael@0 | 20 | uint32_t inbuflen, |
michael@0 | 21 | uint32_t* inscanlen |
michael@0 | 22 | ); |
michael@0 | 23 | |
michael@0 | 24 | int uScan( |
michael@0 | 25 | uScanClassID scanClass, |
michael@0 | 26 | int32_t* state, |
michael@0 | 27 | unsigned char *in, |
michael@0 | 28 | uint16_t *out, |
michael@0 | 29 | uint32_t inbuflen, |
michael@0 | 30 | uint32_t* inscanlen |
michael@0 | 31 | ); |
michael@0 | 32 | |
michael@0 | 33 | #define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out)) |
michael@0 | 34 | |
michael@0 | 35 | int uCheckAndScanAlways1Byte( |
michael@0 | 36 | int32_t* state, |
michael@0 | 37 | unsigned char *in, |
michael@0 | 38 | uint16_t *out, |
michael@0 | 39 | uint32_t inbuflen, |
michael@0 | 40 | uint32_t* inscanlen |
michael@0 | 41 | ); |
michael@0 | 42 | int uCheckAndScanAlways2Byte( |
michael@0 | 43 | int32_t* state, |
michael@0 | 44 | unsigned char *in, |
michael@0 | 45 | uint16_t *out, |
michael@0 | 46 | uint32_t inbuflen, |
michael@0 | 47 | uint32_t* inscanlen |
michael@0 | 48 | ); |
michael@0 | 49 | int uCheckAndScanAlways2ByteShiftGR( |
michael@0 | 50 | int32_t* state, |
michael@0 | 51 | unsigned char *in, |
michael@0 | 52 | uint16_t *out, |
michael@0 | 53 | uint32_t inbuflen, |
michael@0 | 54 | uint32_t* inscanlen |
michael@0 | 55 | ); |
michael@0 | 56 | int uCheckAndScanAlways2ByteGR128( |
michael@0 | 57 | int32_t* state, |
michael@0 | 58 | unsigned char *in, |
michael@0 | 59 | uint16_t *out, |
michael@0 | 60 | uint32_t inbuflen, |
michael@0 | 61 | uint32_t* inscanlen |
michael@0 | 62 | ); |
michael@0 | 63 | int uScanShift( |
michael@0 | 64 | uShiftInTable *shift, |
michael@0 | 65 | int32_t* state, |
michael@0 | 66 | unsigned char *in, |
michael@0 | 67 | uint16_t *out, |
michael@0 | 68 | uint32_t inbuflen, |
michael@0 | 69 | uint32_t* inscanlen |
michael@0 | 70 | ); |
michael@0 | 71 | |
michael@0 | 72 | int uCheckAndScan2ByteGRPrefix8F( |
michael@0 | 73 | int32_t* state, |
michael@0 | 74 | unsigned char *in, |
michael@0 | 75 | uint16_t *out, |
michael@0 | 76 | uint32_t inbuflen, |
michael@0 | 77 | uint32_t* inscanlen |
michael@0 | 78 | ); |
michael@0 | 79 | int uCheckAndScan2ByteGRPrefix8EA2( |
michael@0 | 80 | int32_t* state, |
michael@0 | 81 | unsigned char *in, |
michael@0 | 82 | uint16_t *out, |
michael@0 | 83 | uint32_t inbuflen, |
michael@0 | 84 | uint32_t* inscanlen |
michael@0 | 85 | ); |
michael@0 | 86 | int uCheckAndScan2ByteGRPrefix8EA3( |
michael@0 | 87 | int32_t* state, |
michael@0 | 88 | unsigned char *in, |
michael@0 | 89 | uint16_t *out, |
michael@0 | 90 | uint32_t inbuflen, |
michael@0 | 91 | uint32_t* inscanlen |
michael@0 | 92 | ); |
michael@0 | 93 | int uCheckAndScan2ByteGRPrefix8EA4( |
michael@0 | 94 | int32_t* state, |
michael@0 | 95 | unsigned char *in, |
michael@0 | 96 | uint16_t *out, |
michael@0 | 97 | uint32_t inbuflen, |
michael@0 | 98 | uint32_t* inscanlen |
michael@0 | 99 | ); |
michael@0 | 100 | int uCheckAndScan2ByteGRPrefix8EA5( |
michael@0 | 101 | int32_t* state, |
michael@0 | 102 | unsigned char *in, |
michael@0 | 103 | uint16_t *out, |
michael@0 | 104 | uint32_t inbuflen, |
michael@0 | 105 | uint32_t* inscanlen |
michael@0 | 106 | ); |
michael@0 | 107 | int uCheckAndScan2ByteGRPrefix8EA6( |
michael@0 | 108 | int32_t* state, |
michael@0 | 109 | unsigned char *in, |
michael@0 | 110 | uint16_t *out, |
michael@0 | 111 | uint32_t inbuflen, |
michael@0 | 112 | uint32_t* inscanlen |
michael@0 | 113 | ); |
michael@0 | 114 | int uCheckAndScan2ByteGRPrefix8EA7( |
michael@0 | 115 | int32_t* state, |
michael@0 | 116 | unsigned char *in, |
michael@0 | 117 | uint16_t *out, |
michael@0 | 118 | uint32_t inbuflen, |
michael@0 | 119 | uint32_t* inscanlen |
michael@0 | 120 | ); |
michael@0 | 121 | int uCnSAlways8BytesDecomposedHangul( |
michael@0 | 122 | int32_t* state, |
michael@0 | 123 | unsigned char *in, |
michael@0 | 124 | uint16_t *out, |
michael@0 | 125 | uint32_t inbuflen, |
michael@0 | 126 | uint32_t* inscanlen |
michael@0 | 127 | ); |
michael@0 | 128 | int uCheckAndScanJohabHangul( |
michael@0 | 129 | int32_t* state, |
michael@0 | 130 | unsigned char *in, |
michael@0 | 131 | uint16_t *out, |
michael@0 | 132 | uint32_t inbuflen, |
michael@0 | 133 | uint32_t* inscanlen |
michael@0 | 134 | ); |
michael@0 | 135 | int uCheckAndScanJohabSymbol( |
michael@0 | 136 | int32_t* state, |
michael@0 | 137 | unsigned char *in, |
michael@0 | 138 | uint16_t *out, |
michael@0 | 139 | uint32_t inbuflen, |
michael@0 | 140 | uint32_t* inscanlen |
michael@0 | 141 | ); |
michael@0 | 142 | |
michael@0 | 143 | int uCheckAndScan4BytesGB18030( |
michael@0 | 144 | int32_t* state, |
michael@0 | 145 | unsigned char *in, |
michael@0 | 146 | uint16_t *out, |
michael@0 | 147 | uint32_t inbuflen, |
michael@0 | 148 | uint32_t* inscanlen |
michael@0 | 149 | ); |
michael@0 | 150 | |
michael@0 | 151 | int uScanAlways2Byte( |
michael@0 | 152 | unsigned char* in, |
michael@0 | 153 | uint16_t* out |
michael@0 | 154 | ); |
michael@0 | 155 | int uScanAlways2ByteShiftGR( |
michael@0 | 156 | unsigned char* in, |
michael@0 | 157 | uint16_t* out |
michael@0 | 158 | ); |
michael@0 | 159 | int uScanAlways1Byte( |
michael@0 | 160 | unsigned char* in, |
michael@0 | 161 | uint16_t* out |
michael@0 | 162 | ); |
michael@0 | 163 | int uScanAlways1BytePrefix8E( |
michael@0 | 164 | unsigned char* in, |
michael@0 | 165 | uint16_t* out |
michael@0 | 166 | ); |
michael@0 | 167 | /*================================================================================= |
michael@0 | 168 | |
michael@0 | 169 | =================================================================================*/ |
michael@0 | 170 | const uScannerFunc m_scanner[uNumOfCharsetType] = |
michael@0 | 171 | { |
michael@0 | 172 | uCheckAndScanAlways1Byte, |
michael@0 | 173 | uCheckAndScanAlways2Byte, |
michael@0 | 174 | uCheckAndScanAlways2ByteShiftGR, |
michael@0 | 175 | uCheckAndScan2ByteGRPrefix8F, |
michael@0 | 176 | uCheckAndScan2ByteGRPrefix8EA2, |
michael@0 | 177 | uCheckAndScan2ByteGRPrefix8EA3, |
michael@0 | 178 | uCheckAndScan2ByteGRPrefix8EA4, |
michael@0 | 179 | uCheckAndScan2ByteGRPrefix8EA5, |
michael@0 | 180 | uCheckAndScan2ByteGRPrefix8EA6, |
michael@0 | 181 | uCheckAndScan2ByteGRPrefix8EA7, |
michael@0 | 182 | uCnSAlways8BytesDecomposedHangul, |
michael@0 | 183 | uCheckAndScanJohabHangul, |
michael@0 | 184 | uCheckAndScanJohabSymbol, |
michael@0 | 185 | uCheckAndScan4BytesGB18030, |
michael@0 | 186 | uCheckAndScanAlways2ByteGR128 |
michael@0 | 187 | }; |
michael@0 | 188 | |
michael@0 | 189 | /*================================================================================= |
michael@0 | 190 | |
michael@0 | 191 | =================================================================================*/ |
michael@0 | 192 | |
michael@0 | 193 | const uSubScannerFunc m_subscanner[uNumOfCharType] = |
michael@0 | 194 | { |
michael@0 | 195 | uScanAlways1Byte, |
michael@0 | 196 | uScanAlways2Byte, |
michael@0 | 197 | uScanAlways2ByteShiftGR, |
michael@0 | 198 | uScanAlways1BytePrefix8E |
michael@0 | 199 | }; |
michael@0 | 200 | /*================================================================================= |
michael@0 | 201 | |
michael@0 | 202 | =================================================================================*/ |
michael@0 | 203 | int uScan( |
michael@0 | 204 | uScanClassID scanClass, |
michael@0 | 205 | int32_t* state, |
michael@0 | 206 | unsigned char *in, |
michael@0 | 207 | uint16_t *out, |
michael@0 | 208 | uint32_t inbuflen, |
michael@0 | 209 | uint32_t* inscanlen |
michael@0 | 210 | ) |
michael@0 | 211 | { |
michael@0 | 212 | return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen); |
michael@0 | 213 | } |
michael@0 | 214 | /*================================================================================= |
michael@0 | 215 | |
michael@0 | 216 | =================================================================================*/ |
michael@0 | 217 | int uScanAlways1Byte( |
michael@0 | 218 | unsigned char* in, |
michael@0 | 219 | uint16_t* out |
michael@0 | 220 | ) |
michael@0 | 221 | { |
michael@0 | 222 | *out = (uint16_t) in[0]; |
michael@0 | 223 | return 1; |
michael@0 | 224 | } |
michael@0 | 225 | |
michael@0 | 226 | /*================================================================================= |
michael@0 | 227 | |
michael@0 | 228 | =================================================================================*/ |
michael@0 | 229 | int uScanAlways2Byte( |
michael@0 | 230 | unsigned char* in, |
michael@0 | 231 | uint16_t* out |
michael@0 | 232 | ) |
michael@0 | 233 | { |
michael@0 | 234 | *out = (uint16_t) (( in[0] << 8) | (in[1])); |
michael@0 | 235 | return 1; |
michael@0 | 236 | } |
michael@0 | 237 | /*================================================================================= |
michael@0 | 238 | |
michael@0 | 239 | =================================================================================*/ |
michael@0 | 240 | int uScanAlways2ByteShiftGR( |
michael@0 | 241 | unsigned char* in, |
michael@0 | 242 | uint16_t* out |
michael@0 | 243 | ) |
michael@0 | 244 | { |
michael@0 | 245 | *out = (uint16_t) ((( in[0] << 8) | (in[1])) & 0x7F7F); |
michael@0 | 246 | return 1; |
michael@0 | 247 | } |
michael@0 | 248 | |
michael@0 | 249 | /*================================================================================= |
michael@0 | 250 | |
michael@0 | 251 | =================================================================================*/ |
michael@0 | 252 | int uScanAlways1BytePrefix8E( |
michael@0 | 253 | unsigned char* in, |
michael@0 | 254 | uint16_t* out |
michael@0 | 255 | ) |
michael@0 | 256 | { |
michael@0 | 257 | *out = (uint16_t) in[1]; |
michael@0 | 258 | return 1; |
michael@0 | 259 | } |
michael@0 | 260 | /*================================================================================= |
michael@0 | 261 | |
michael@0 | 262 | =================================================================================*/ |
michael@0 | 263 | int uCheckAndScanAlways1Byte( |
michael@0 | 264 | int32_t* state, |
michael@0 | 265 | unsigned char *in, |
michael@0 | 266 | uint16_t *out, |
michael@0 | 267 | uint32_t inbuflen, |
michael@0 | 268 | uint32_t* inscanlen |
michael@0 | 269 | ) |
michael@0 | 270 | { |
michael@0 | 271 | /* Don't check inlen. The caller should ensure it is larger than 0 */ |
michael@0 | 272 | *inscanlen = 1; |
michael@0 | 273 | *out = (uint16_t) in[0]; |
michael@0 | 274 | |
michael@0 | 275 | return 1; |
michael@0 | 276 | } |
michael@0 | 277 | |
michael@0 | 278 | /*================================================================================= |
michael@0 | 279 | |
michael@0 | 280 | =================================================================================*/ |
michael@0 | 281 | int uCheckAndScanAlways2Byte( |
michael@0 | 282 | int32_t* state, |
michael@0 | 283 | unsigned char *in, |
michael@0 | 284 | uint16_t *out, |
michael@0 | 285 | uint32_t inbuflen, |
michael@0 | 286 | uint32_t* inscanlen |
michael@0 | 287 | ) |
michael@0 | 288 | { |
michael@0 | 289 | if(inbuflen < 2) |
michael@0 | 290 | return 0; |
michael@0 | 291 | else |
michael@0 | 292 | { |
michael@0 | 293 | *inscanlen = 2; |
michael@0 | 294 | *out = ((in[0] << 8) | ( in[1])) ; |
michael@0 | 295 | return 1; |
michael@0 | 296 | } |
michael@0 | 297 | } |
michael@0 | 298 | /*================================================================================= |
michael@0 | 299 | |
michael@0 | 300 | =================================================================================*/ |
michael@0 | 301 | int uCheckAndScanAlways2ByteShiftGR( |
michael@0 | 302 | int32_t* state, |
michael@0 | 303 | unsigned char *in, |
michael@0 | 304 | uint16_t *out, |
michael@0 | 305 | uint32_t inbuflen, |
michael@0 | 306 | uint32_t* inscanlen |
michael@0 | 307 | ) |
michael@0 | 308 | { |
michael@0 | 309 | /* |
michael@0 | 310 | * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets |
michael@0 | 311 | * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets. |
michael@0 | 312 | * Only 2nd byte range needs to be checked because |
michael@0 | 313 | * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp |
michael@0 | 314 | */ |
michael@0 | 315 | if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ |
michael@0 | 316 | return 0; |
michael@0 | 317 | else if (! CHK_GR94(in[1])) |
michael@0 | 318 | { |
michael@0 | 319 | *inscanlen = 2; |
michael@0 | 320 | *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
michael@0 | 321 | return 1; |
michael@0 | 322 | } |
michael@0 | 323 | else |
michael@0 | 324 | { |
michael@0 | 325 | *inscanlen = 2; |
michael@0 | 326 | *out = (((in[0] << 8) | ( in[1])) & 0x7F7F); |
michael@0 | 327 | return 1; |
michael@0 | 328 | } |
michael@0 | 329 | } |
michael@0 | 330 | /*================================================================================= |
michael@0 | 331 | |
michael@0 | 332 | =================================================================================*/ |
michael@0 | 333 | int uCheckAndScanAlways2ByteGR128( |
michael@0 | 334 | int32_t* state, |
michael@0 | 335 | unsigned char *in, |
michael@0 | 336 | uint16_t *out, |
michael@0 | 337 | uint32_t inbuflen, |
michael@0 | 338 | uint32_t* inscanlen |
michael@0 | 339 | ) |
michael@0 | 340 | { |
michael@0 | 341 | /* |
michael@0 | 342 | * The first byte should be in [0xa1,0xfe] |
michael@0 | 343 | * and the second byte in [0x41,0xfe] |
michael@0 | 344 | * Used by CP949 -> Unicode converter. |
michael@0 | 345 | * Only 2nd byte range needs to be checked because |
michael@0 | 346 | * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp |
michael@0 | 347 | */ |
michael@0 | 348 | if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ |
michael@0 | 349 | return 0; |
michael@0 | 350 | else if (in[1] < 0x41) /* 2nd byte range check */ |
michael@0 | 351 | { |
michael@0 | 352 | *inscanlen = 2; |
michael@0 | 353 | *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
michael@0 | 354 | return 1; |
michael@0 | 355 | } |
michael@0 | 356 | else |
michael@0 | 357 | { |
michael@0 | 358 | *inscanlen = 2; |
michael@0 | 359 | *out = (in[0] << 8) | in[1]; |
michael@0 | 360 | return 1; |
michael@0 | 361 | } |
michael@0 | 362 | } |
michael@0 | 363 | /*================================================================================= |
michael@0 | 364 | |
michael@0 | 365 | =================================================================================*/ |
michael@0 | 366 | int uScanShift( |
michael@0 | 367 | uShiftInTable *shift, |
michael@0 | 368 | int32_t* state, |
michael@0 | 369 | unsigned char *in, |
michael@0 | 370 | uint16_t *out, |
michael@0 | 371 | uint32_t inbuflen, |
michael@0 | 372 | uint32_t* inscanlen |
michael@0 | 373 | ) |
michael@0 | 374 | { |
michael@0 | 375 | int16_t i; |
michael@0 | 376 | const uShiftInCell* cell = &(shift->shiftcell[0]); |
michael@0 | 377 | int16_t itemnum = shift->numOfItem; |
michael@0 | 378 | for(i=0;i<itemnum;i++) |
michael@0 | 379 | { |
michael@0 | 380 | if( ( in[0] >= cell[i].shiftin_Min) && |
michael@0 | 381 | ( in[0] <= cell[i].shiftin_Max)) |
michael@0 | 382 | { |
michael@0 | 383 | if(inbuflen < cell[i].reserveLen) |
michael@0 | 384 | return 0; |
michael@0 | 385 | else |
michael@0 | 386 | { |
michael@0 | 387 | *inscanlen = cell[i].reserveLen; |
michael@0 | 388 | return (uSubScanner(cell[i].classID,in,out)); |
michael@0 | 389 | } |
michael@0 | 390 | } |
michael@0 | 391 | } |
michael@0 | 392 | return 0; |
michael@0 | 393 | } |
michael@0 | 394 | /*================================================================================= |
michael@0 | 395 | |
michael@0 | 396 | =================================================================================*/ |
michael@0 | 397 | int uCheckAndScan2ByteGRPrefix8F( |
michael@0 | 398 | int32_t* state, |
michael@0 | 399 | unsigned char *in, |
michael@0 | 400 | uint16_t *out, |
michael@0 | 401 | uint32_t inbuflen, |
michael@0 | 402 | uint32_t* inscanlen |
michael@0 | 403 | ) |
michael@0 | 404 | { |
michael@0 | 405 | if((inbuflen < 3) ||(in[0] != 0x8F)) |
michael@0 | 406 | return 0; |
michael@0 | 407 | else if (! CHK_GR94(in[1])) /* 2nd byte range check */ |
michael@0 | 408 | { |
michael@0 | 409 | *inscanlen = 2; |
michael@0 | 410 | *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
michael@0 | 411 | return 1; |
michael@0 | 412 | } |
michael@0 | 413 | else if (! CHK_GR94(in[2])) /* 3rd byte range check */ |
michael@0 | 414 | { |
michael@0 | 415 | *inscanlen = 3; |
michael@0 | 416 | *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
michael@0 | 417 | return 1; |
michael@0 | 418 | } |
michael@0 | 419 | else |
michael@0 | 420 | { |
michael@0 | 421 | *inscanlen = 3; |
michael@0 | 422 | *out = (((in[1] << 8) | ( in[2])) & 0x7F7F); |
michael@0 | 423 | return 1; |
michael@0 | 424 | } |
michael@0 | 425 | } |
michael@0 | 426 | /*================================================================================= |
michael@0 | 427 | |
michael@0 | 428 | =================================================================================*/ |
michael@0 | 429 | |
michael@0 | 430 | /* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX() |
michael@0 | 431 | * where X is 2,3,4,5,6,7 |
michael@0 | 432 | */ |
michael@0 | 433 | #define CNS_8EAX_4BYTE(PREFIX) \ |
michael@0 | 434 | if((inbuflen < 4) || (in[0] != 0x8E)) \ |
michael@0 | 435 | return 0; \ |
michael@0 | 436 | else if((in[1] != (PREFIX))) \ |
michael@0 | 437 | { \ |
michael@0 | 438 | *inscanlen = 2; \ |
michael@0 | 439 | *out = 0xFF; \ |
michael@0 | 440 | return 1; \ |
michael@0 | 441 | } \ |
michael@0 | 442 | else if(! CHK_GR94(in[2])) \ |
michael@0 | 443 | { \ |
michael@0 | 444 | *inscanlen = 3; \ |
michael@0 | 445 | *out = 0xFF; \ |
michael@0 | 446 | return 1; \ |
michael@0 | 447 | } \ |
michael@0 | 448 | else if(! CHK_GR94(in[3])) \ |
michael@0 | 449 | { \ |
michael@0 | 450 | *inscanlen = 4; \ |
michael@0 | 451 | *out = 0xFF; \ |
michael@0 | 452 | return 1; \ |
michael@0 | 453 | } \ |
michael@0 | 454 | else \ |
michael@0 | 455 | { \ |
michael@0 | 456 | *inscanlen = 4; \ |
michael@0 | 457 | *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \ |
michael@0 | 458 | return 1; \ |
michael@0 | 459 | } |
michael@0 | 460 | |
michael@0 | 461 | int uCheckAndScan2ByteGRPrefix8EA2( |
michael@0 | 462 | int32_t* state, |
michael@0 | 463 | unsigned char *in, |
michael@0 | 464 | uint16_t *out, |
michael@0 | 465 | uint32_t inbuflen, |
michael@0 | 466 | uint32_t* inscanlen |
michael@0 | 467 | ) |
michael@0 | 468 | { |
michael@0 | 469 | CNS_8EAX_4BYTE(0xA2) |
michael@0 | 470 | } |
michael@0 | 471 | |
michael@0 | 472 | /*================================================================================= |
michael@0 | 473 | |
michael@0 | 474 | =================================================================================*/ |
michael@0 | 475 | int uCheckAndScan2ByteGRPrefix8EA3( |
michael@0 | 476 | int32_t* state, |
michael@0 | 477 | unsigned char *in, |
michael@0 | 478 | uint16_t *out, |
michael@0 | 479 | uint32_t inbuflen, |
michael@0 | 480 | uint32_t* inscanlen |
michael@0 | 481 | ) |
michael@0 | 482 | { |
michael@0 | 483 | CNS_8EAX_4BYTE(0xA3) |
michael@0 | 484 | } |
michael@0 | 485 | /*================================================================================= |
michael@0 | 486 | |
michael@0 | 487 | =================================================================================*/ |
michael@0 | 488 | int uCheckAndScan2ByteGRPrefix8EA4( |
michael@0 | 489 | int32_t* state, |
michael@0 | 490 | unsigned char *in, |
michael@0 | 491 | uint16_t *out, |
michael@0 | 492 | uint32_t inbuflen, |
michael@0 | 493 | uint32_t* inscanlen |
michael@0 | 494 | ) |
michael@0 | 495 | { |
michael@0 | 496 | CNS_8EAX_4BYTE(0xA4) |
michael@0 | 497 | } |
michael@0 | 498 | /*================================================================================= |
michael@0 | 499 | |
michael@0 | 500 | =================================================================================*/ |
michael@0 | 501 | int uCheckAndScan2ByteGRPrefix8EA5( |
michael@0 | 502 | int32_t* state, |
michael@0 | 503 | unsigned char *in, |
michael@0 | 504 | uint16_t *out, |
michael@0 | 505 | uint32_t inbuflen, |
michael@0 | 506 | uint32_t* inscanlen |
michael@0 | 507 | ) |
michael@0 | 508 | { |
michael@0 | 509 | CNS_8EAX_4BYTE(0xA5) |
michael@0 | 510 | } |
michael@0 | 511 | /*================================================================================= |
michael@0 | 512 | |
michael@0 | 513 | =================================================================================*/ |
michael@0 | 514 | int uCheckAndScan2ByteGRPrefix8EA6( |
michael@0 | 515 | int32_t* state, |
michael@0 | 516 | unsigned char *in, |
michael@0 | 517 | uint16_t *out, |
michael@0 | 518 | uint32_t inbuflen, |
michael@0 | 519 | uint32_t* inscanlen |
michael@0 | 520 | ) |
michael@0 | 521 | { |
michael@0 | 522 | CNS_8EAX_4BYTE(0xA6) |
michael@0 | 523 | } |
michael@0 | 524 | /*================================================================================= |
michael@0 | 525 | |
michael@0 | 526 | =================================================================================*/ |
michael@0 | 527 | int uCheckAndScan2ByteGRPrefix8EA7( |
michael@0 | 528 | int32_t* state, |
michael@0 | 529 | unsigned char *in, |
michael@0 | 530 | uint16_t *out, |
michael@0 | 531 | uint32_t inbuflen, |
michael@0 | 532 | uint32_t* inscanlen |
michael@0 | 533 | ) |
michael@0 | 534 | { |
michael@0 | 535 | CNS_8EAX_4BYTE(0xA7) |
michael@0 | 536 | } |
michael@0 | 537 | /*================================================================================= |
michael@0 | 538 | |
michael@0 | 539 | =================================================================================*/ |
michael@0 | 540 | #define SBase 0xAC00 |
michael@0 | 541 | #define SCount 11172 |
michael@0 | 542 | #define LCount 19 |
michael@0 | 543 | #define VCount 21 |
michael@0 | 544 | #define TCount 28 |
michael@0 | 545 | #define NCount (VCount * TCount) |
michael@0 | 546 | |
michael@0 | 547 | int uCnSAlways8BytesDecomposedHangul( |
michael@0 | 548 | int32_t* state, |
michael@0 | 549 | unsigned char *in, |
michael@0 | 550 | uint16_t *out, |
michael@0 | 551 | uint32_t inbuflen, |
michael@0 | 552 | uint32_t* inscanlen |
michael@0 | 553 | ) |
michael@0 | 554 | { |
michael@0 | 555 | |
michael@0 | 556 | uint16_t LIndex, VIndex, TIndex; |
michael@0 | 557 | /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */ |
michael@0 | 558 | if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) || |
michael@0 | 559 | (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6])) |
michael@0 | 560 | return 0; |
michael@0 | 561 | |
michael@0 | 562 | /* Compute LIndex */ |
michael@0 | 563 | if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */ |
michael@0 | 564 | return 0; |
michael@0 | 565 | } |
michael@0 | 566 | else { |
michael@0 | 567 | static const uint8_t lMap[] = { |
michael@0 | 568 | /* A1 A2 A3 A4 A5 A6 A7 */ |
michael@0 | 569 | 0, 1,0xff, 2,0xff,0xff, 3, |
michael@0 | 570 | /* A8 A9 AA AB AC AD AE AF */ |
michael@0 | 571 | 4, 5,0xff,0xff,0xff,0xff,0xff,0xff, |
michael@0 | 572 | /* B0 B1 B2 B3 B4 B5 B6 B7 */ |
michael@0 | 573 | 0xff, 6, 7, 8,0xff, 9, 10, 11, |
michael@0 | 574 | /* B8 B9 BA BB BC BD BE */ |
michael@0 | 575 | 12, 13, 14, 15, 16, 17, 18 |
michael@0 | 576 | }; |
michael@0 | 577 | |
michael@0 | 578 | LIndex = lMap[in[3] - 0xa1]; |
michael@0 | 579 | if(0xff == (0xff & LIndex)) |
michael@0 | 580 | return 0; |
michael@0 | 581 | } |
michael@0 | 582 | |
michael@0 | 583 | /* Compute VIndex */ |
michael@0 | 584 | if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */ |
michael@0 | 585 | return 0; |
michael@0 | 586 | } |
michael@0 | 587 | else { |
michael@0 | 588 | VIndex = in[5] - 0xbf; |
michael@0 | 589 | } |
michael@0 | 590 | |
michael@0 | 591 | /* Compute TIndex */ |
michael@0 | 592 | if(0xd4 == in[7]) |
michael@0 | 593 | { |
michael@0 | 594 | TIndex = 0; |
michael@0 | 595 | } |
michael@0 | 596 | else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */ |
michael@0 | 597 | return 0; |
michael@0 | 598 | } |
michael@0 | 599 | else { |
michael@0 | 600 | static const uint8_t tMap[] = { |
michael@0 | 601 | /* A1 A2 A3 A4 A5 A6 A7 */ |
michael@0 | 602 | 1, 2, 3, 4, 5, 6, 7, |
michael@0 | 603 | /* A8 A9 AA AB AC AD AE AF */ |
michael@0 | 604 | 0xff, 8, 9, 10, 11, 12, 13, 14, |
michael@0 | 605 | /* B0 B1 B2 B3 B4 B5 B6 B7 */ |
michael@0 | 606 | 15, 16, 17,0xff, 18, 19, 20, 21, |
michael@0 | 607 | /* B8 B9 BA BB BC BD BE */ |
michael@0 | 608 | 22,0xff, 23, 24, 25, 26, 27 |
michael@0 | 609 | }; |
michael@0 | 610 | TIndex = tMap[in[7] - 0xa1]; |
michael@0 | 611 | if(0xff == (0xff & TIndex)) |
michael@0 | 612 | return 0; |
michael@0 | 613 | } |
michael@0 | 614 | |
michael@0 | 615 | *inscanlen = 8; |
michael@0 | 616 | /* the following line is from Unicode 2.0 page 3-13 item 5 */ |
michael@0 | 617 | *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; |
michael@0 | 618 | |
michael@0 | 619 | return 1; |
michael@0 | 620 | } |
michael@0 | 621 | /*================================================================================= |
michael@0 | 622 | |
michael@0 | 623 | =================================================================================*/ |
michael@0 | 624 | |
michael@0 | 625 | int uCheckAndScanJohabHangul( |
michael@0 | 626 | int32_t* state, |
michael@0 | 627 | unsigned char *in, |
michael@0 | 628 | uint16_t *out, |
michael@0 | 629 | uint32_t inbuflen, |
michael@0 | 630 | uint32_t* inscanlen |
michael@0 | 631 | ) |
michael@0 | 632 | { |
michael@0 | 633 | /* since we don't have code to convert Johab to Unicode right now * |
michael@0 | 634 | * make this part of code #if 0 to save space until we fully test it */ |
michael@0 | 635 | if(inbuflen < 2) |
michael@0 | 636 | return 0; |
michael@0 | 637 | else { |
michael@0 | 638 | /* |
michael@0 | 639 | * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183 |
michael@0 | 640 | * of "CJKV Information Processing" for details |
michael@0 | 641 | */ |
michael@0 | 642 | static const uint8_t lMap[32]={ /* totaly 19 */ |
michael@0 | 643 | 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */ |
michael@0 | 644 | 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */ |
michael@0 | 645 | 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */ |
michael@0 | 646 | 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */ |
michael@0 | 647 | }; |
michael@0 | 648 | static const uint8_t vMap[32]={ /* totaly 21 */ |
michael@0 | 649 | 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */ |
michael@0 | 650 | 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */ |
michael@0 | 651 | 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */ |
michael@0 | 652 | 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */ |
michael@0 | 653 | }; |
michael@0 | 654 | static const uint8_t tMap[32]={ /* totaly 29 */ |
michael@0 | 655 | 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */ |
michael@0 | 656 | 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */ |
michael@0 | 657 | 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */ |
michael@0 | 658 | 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */ |
michael@0 | 659 | }; |
michael@0 | 660 | uint16_t ch = (in[0] << 8) | in[1]; |
michael@0 | 661 | uint16_t LIndex, VIndex, TIndex; |
michael@0 | 662 | if(0 == (0x8000 & ch)) |
michael@0 | 663 | return 0; |
michael@0 | 664 | LIndex=lMap[(ch>>10)& 0x1F]; |
michael@0 | 665 | VIndex=vMap[(ch>>5) & 0x1F]; |
michael@0 | 666 | TIndex=tMap[(ch>>0) & 0x1F]; |
michael@0 | 667 | if((0xff==(LIndex)) || |
michael@0 | 668 | (0xff==(VIndex)) || |
michael@0 | 669 | (0xff==(TIndex))) |
michael@0 | 670 | return 0; |
michael@0 | 671 | /* the following line is from Unicode 2.0 page 3-13 item 5 */ |
michael@0 | 672 | *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; |
michael@0 | 673 | *inscanlen = 2; |
michael@0 | 674 | return 1; |
michael@0 | 675 | } |
michael@0 | 676 | } |
michael@0 | 677 | int uCheckAndScanJohabSymbol( |
michael@0 | 678 | int32_t* state, |
michael@0 | 679 | unsigned char *in, |
michael@0 | 680 | uint16_t *out, |
michael@0 | 681 | uint32_t inbuflen, |
michael@0 | 682 | uint32_t* inscanlen |
michael@0 | 683 | ) |
michael@0 | 684 | { |
michael@0 | 685 | if(inbuflen < 2) |
michael@0 | 686 | return 0; |
michael@0 | 687 | else { |
michael@0 | 688 | /* |
michael@0 | 689 | * The following code are based on the Perl code lised under |
michael@0 | 690 | * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of |
michael@0 | 691 | * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com> |
michael@0 | 692 | * |
michael@0 | 693 | * sub johab2ks ($) { # Convert Johab to ISO-2022-KR |
michael@0 | 694 | * my @johab = unpack("C*", $_[0]); |
michael@0 | 695 | * my ($offset, $d8_off) = (0,0); |
michael@0 | 696 | * my @out = (); |
michael@0 | 697 | * while(($hi, $lo) = splice($johab, 0, 2)) { |
michael@0 | 698 | * $offset = 1 if ($hi > 223 and $hi < 250); |
michael@0 | 699 | * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); |
michael@0 | 700 | * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) - |
michael@0 | 701 | * ($lo < 161 ? 1 : 0) + $offset) + $d8_off), |
michael@0 | 702 | * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 )); |
michael@0 | 703 | * } |
michael@0 | 704 | * return pack ("C*", @out); |
michael@0 | 705 | * } |
michael@0 | 706 | * additional comments from Ken Lunde |
michael@0 | 707 | * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); |
michael@0 | 708 | * has three possible return values: |
michael@0 | 709 | * 0 if $hi is not equal to 216 |
michael@0 | 710 | * 94 if $hi is euqal to 216 and if $lo is greater than 160 |
michael@0 | 711 | * 42 if $hi is euqal to 216 and if $lo is not greater than 160 |
michael@0 | 712 | */ |
michael@0 | 713 | unsigned char hi = in[0]; |
michael@0 | 714 | unsigned char lo = in[1]; |
michael@0 | 715 | uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0; |
michael@0 | 716 | uint16_t d8_off = 0; |
michael@0 | 717 | if(216 == hi) { |
michael@0 | 718 | if( lo > 160) |
michael@0 | 719 | d8_off = 94; |
michael@0 | 720 | else |
michael@0 | 721 | d8_off = 42; |
michael@0 | 722 | } |
michael@0 | 723 | |
michael@0 | 724 | *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) - |
michael@0 | 725 | (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) | |
michael@0 | 726 | (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : |
michael@0 | 727 | 128)); |
michael@0 | 728 | *inscanlen = 2; |
michael@0 | 729 | return 1; |
michael@0 | 730 | } |
michael@0 | 731 | } |
michael@0 | 732 | int uCheckAndScan4BytesGB18030( |
michael@0 | 733 | int32_t* state, |
michael@0 | 734 | unsigned char *in, |
michael@0 | 735 | uint16_t *out, |
michael@0 | 736 | uint32_t inbuflen, |
michael@0 | 737 | uint32_t* inscanlen |
michael@0 | 738 | ) |
michael@0 | 739 | { |
michael@0 | 740 | uint32_t data; |
michael@0 | 741 | if(inbuflen < 4) |
michael@0 | 742 | return 0; |
michael@0 | 743 | |
michael@0 | 744 | if((in[0] < 0x81 ) || (0xfe < in[0])) |
michael@0 | 745 | return 0; |
michael@0 | 746 | if((in[1] < 0x30 ) || (0x39 < in[1])) |
michael@0 | 747 | return 0; |
michael@0 | 748 | if((in[2] < 0x81 ) || (0xfe < in[2])) |
michael@0 | 749 | return 0; |
michael@0 | 750 | if((in[3] < 0x30 ) || (0x39 < in[3])) |
michael@0 | 751 | return 0; |
michael@0 | 752 | |
michael@0 | 753 | data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + |
michael@0 | 754 | (in[2] - 0x81)) * 10 ) + (in[3] - 0x30); |
michael@0 | 755 | |
michael@0 | 756 | *inscanlen = 4; |
michael@0 | 757 | *out = (data < 0x00010000) ? data : 0xFFFD; |
michael@0 | 758 | return 1; |
michael@0 | 759 | } |