intl/uconv/util/ugen.c

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include "unicpriv.h"
michael@0 6 /*=================================================================================
michael@0 7
michael@0 8 =================================================================================*/
michael@0 9 typedef int (*uSubGeneratorFunc) (uint16_t in, unsigned char* out);
michael@0 10 /*=================================================================================
michael@0 11
michael@0 12 =================================================================================*/
michael@0 13
michael@0 14 typedef int (*uGeneratorFunc) (
michael@0 15 int32_t* state,
michael@0 16 uint16_t in,
michael@0 17 unsigned char* out,
michael@0 18 uint32_t outbuflen,
michael@0 19 uint32_t* outlen
michael@0 20 );
michael@0 21
michael@0 22 int uGenerate(
michael@0 23 uScanClassID scanClass,
michael@0 24 int32_t* state,
michael@0 25 uint16_t in,
michael@0 26 unsigned char* out,
michael@0 27 uint32_t outbuflen,
michael@0 28 uint32_t* outlen
michael@0 29 );
michael@0 30
michael@0 31 #define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out))
michael@0 32
michael@0 33 int uCheckAndGenAlways1Byte(
michael@0 34 int32_t* state,
michael@0 35 uint16_t in,
michael@0 36 unsigned char* out,
michael@0 37 uint32_t outbuflen,
michael@0 38 uint32_t* outlen
michael@0 39 );
michael@0 40 int uCheckAndGenAlways2Byte(
michael@0 41 int32_t* state,
michael@0 42 uint16_t in,
michael@0 43 unsigned char* out,
michael@0 44 uint32_t outbuflen,
michael@0 45 uint32_t* outlen
michael@0 46 );
michael@0 47 int uCheckAndGenAlways2ByteShiftGR(
michael@0 48 int32_t* state,
michael@0 49 uint16_t in,
michael@0 50 unsigned char* out,
michael@0 51 uint32_t outbuflen,
michael@0 52 uint32_t* outlen
michael@0 53 );
michael@0 54 int uGenerateShift(
michael@0 55 uShiftOutTable *shift,
michael@0 56 int32_t* state,
michael@0 57 uint16_t in,
michael@0 58 unsigned char* out,
michael@0 59 uint32_t outbuflen,
michael@0 60 uint32_t* outlen
michael@0 61 );
michael@0 62 int uCheckAndGen2ByteGRPrefix8F(
michael@0 63 int32_t* state,
michael@0 64 uint16_t in,
michael@0 65 unsigned char* out,
michael@0 66 uint32_t outbuflen,
michael@0 67 uint32_t* outlen
michael@0 68 );
michael@0 69 int uCheckAndGen2ByteGRPrefix8EA2(
michael@0 70 int32_t* state,
michael@0 71 uint16_t in,
michael@0 72 unsigned char* out,
michael@0 73 uint32_t outbuflen,
michael@0 74 uint32_t* outlen
michael@0 75 );
michael@0 76
michael@0 77 int uCheckAndGen2ByteGRPrefix8EA3(
michael@0 78 int32_t* state,
michael@0 79 uint16_t in,
michael@0 80 unsigned char* out,
michael@0 81 uint32_t outbuflen,
michael@0 82 uint32_t* outlen
michael@0 83 );
michael@0 84
michael@0 85 int uCheckAndGen2ByteGRPrefix8EA4(
michael@0 86 int32_t* state,
michael@0 87 uint16_t in,
michael@0 88 unsigned char* out,
michael@0 89 uint32_t outbuflen,
michael@0 90 uint32_t* outlen
michael@0 91 );
michael@0 92
michael@0 93 int uCheckAndGen2ByteGRPrefix8EA5(
michael@0 94 int32_t* state,
michael@0 95 uint16_t in,
michael@0 96 unsigned char* out,
michael@0 97 uint32_t outbuflen,
michael@0 98 uint32_t* outlen
michael@0 99 );
michael@0 100
michael@0 101 int uCheckAndGen2ByteGRPrefix8EA6(
michael@0 102 int32_t* state,
michael@0 103 uint16_t in,
michael@0 104 unsigned char* out,
michael@0 105 uint32_t outbuflen,
michael@0 106 uint32_t* outlen
michael@0 107 );
michael@0 108
michael@0 109 int uCheckAndGen2ByteGRPrefix8EA7(
michael@0 110 int32_t* state,
michael@0 111 uint16_t in,
michael@0 112 unsigned char* out,
michael@0 113 uint32_t outbuflen,
michael@0 114 uint32_t* outlen
michael@0 115 );
michael@0 116 int uCnGAlways8BytesDecomposedHangul(
michael@0 117 int32_t* state,
michael@0 118 uint16_t in,
michael@0 119 unsigned char* out,
michael@0 120 uint32_t outbuflen,
michael@0 121 uint32_t* outlen
michael@0 122 );
michael@0 123
michael@0 124 int uCheckAndGenJohabHangul(
michael@0 125 int32_t* state,
michael@0 126 uint16_t in,
michael@0 127 unsigned char* out,
michael@0 128 uint32_t outbuflen,
michael@0 129 uint32_t* outlen
michael@0 130 );
michael@0 131
michael@0 132 int uCheckAndGenJohabSymbol(
michael@0 133 int32_t* state,
michael@0 134 uint16_t in,
michael@0 135 unsigned char* out,
michael@0 136 uint32_t outbuflen,
michael@0 137 uint32_t* outlen
michael@0 138 );
michael@0 139
michael@0 140
michael@0 141 int uCheckAndGen4BytesGB18030(
michael@0 142 int32_t* state,
michael@0 143 uint16_t in,
michael@0 144 unsigned char* out,
michael@0 145 uint32_t outbuflen,
michael@0 146 uint32_t* outlen
michael@0 147 );
michael@0 148
michael@0 149 int uGenAlways2Byte(
michael@0 150 uint16_t in,
michael@0 151 unsigned char* out
michael@0 152 );
michael@0 153 int uGenAlways2ByteShiftGR(
michael@0 154 uint16_t in,
michael@0 155 unsigned char* out
michael@0 156 );
michael@0 157 int uGenAlways1Byte(
michael@0 158 uint16_t in,
michael@0 159 unsigned char* out
michael@0 160 );
michael@0 161 int uGenAlways1BytePrefix8E(
michael@0 162 uint16_t in,
michael@0 163 unsigned char* out
michael@0 164 );
michael@0 165 /*=================================================================================
michael@0 166
michael@0 167 =================================================================================*/
michael@0 168 const uGeneratorFunc m_generator[uNumOfCharsetType] =
michael@0 169 {
michael@0 170 uCheckAndGenAlways1Byte,
michael@0 171 uCheckAndGenAlways2Byte,
michael@0 172 uCheckAndGenAlways2ByteShiftGR,
michael@0 173 uCheckAndGen2ByteGRPrefix8F,
michael@0 174 uCheckAndGen2ByteGRPrefix8EA2,
michael@0 175 uCheckAndGen2ByteGRPrefix8EA3,
michael@0 176 uCheckAndGen2ByteGRPrefix8EA4,
michael@0 177 uCheckAndGen2ByteGRPrefix8EA5,
michael@0 178 uCheckAndGen2ByteGRPrefix8EA6,
michael@0 179 uCheckAndGen2ByteGRPrefix8EA7,
michael@0 180 uCnGAlways8BytesDecomposedHangul,
michael@0 181 uCheckAndGenJohabHangul,
michael@0 182 uCheckAndGenJohabSymbol,
michael@0 183 uCheckAndGen4BytesGB18030,
michael@0 184 uCheckAndGenAlways2Byte /* place-holder for GR128 */
michael@0 185 };
michael@0 186
michael@0 187 /*=================================================================================
michael@0 188
michael@0 189 =================================================================================*/
michael@0 190
michael@0 191 const uSubGeneratorFunc m_subgenerator[uNumOfCharType] =
michael@0 192 {
michael@0 193 uGenAlways1Byte,
michael@0 194 uGenAlways2Byte,
michael@0 195 uGenAlways2ByteShiftGR,
michael@0 196 uGenAlways1BytePrefix8E
michael@0 197 };
michael@0 198 /*=================================================================================
michael@0 199
michael@0 200 =================================================================================*/
michael@0 201 int uGenerate(
michael@0 202 uScanClassID scanClass,
michael@0 203 int32_t* state,
michael@0 204 uint16_t in,
michael@0 205 unsigned char* out,
michael@0 206 uint32_t outbuflen,
michael@0 207 uint32_t* outlen
michael@0 208 )
michael@0 209 {
michael@0 210 return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen);
michael@0 211 }
michael@0 212 /*=================================================================================
michael@0 213
michael@0 214 =================================================================================*/
michael@0 215 int uGenAlways1Byte(
michael@0 216 uint16_t in,
michael@0 217 unsigned char* out
michael@0 218 )
michael@0 219 {
michael@0 220 out[0] = (unsigned char)in;
michael@0 221 return 1;
michael@0 222 }
michael@0 223
michael@0 224 /*=================================================================================
michael@0 225
michael@0 226 =================================================================================*/
michael@0 227 int uGenAlways2Byte(
michael@0 228 uint16_t in,
michael@0 229 unsigned char* out
michael@0 230 )
michael@0 231 {
michael@0 232 out[0] = (unsigned char)((in >> 8) & 0xff);
michael@0 233 out[1] = (unsigned char)(in & 0xff);
michael@0 234 return 1;
michael@0 235 }
michael@0 236 /*=================================================================================
michael@0 237
michael@0 238 =================================================================================*/
michael@0 239 int uGenAlways2ByteShiftGR(
michael@0 240 uint16_t in,
michael@0 241 unsigned char* out
michael@0 242 )
michael@0 243 {
michael@0 244 out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80);
michael@0 245 out[1] = (unsigned char)((in & 0xff) | 0x80);
michael@0 246 return 1;
michael@0 247 }
michael@0 248 /*=================================================================================
michael@0 249
michael@0 250 =================================================================================*/
michael@0 251 int uGenAlways1BytePrefix8E(
michael@0 252 uint16_t in,
michael@0 253 unsigned char* out
michael@0 254 )
michael@0 255 {
michael@0 256 out[0] = 0x8E;
michael@0 257 out[1] = (unsigned char)(in & 0xff);
michael@0 258 return 1;
michael@0 259 }
michael@0 260 /*=================================================================================
michael@0 261
michael@0 262 =================================================================================*/
michael@0 263 int uCheckAndGenAlways1Byte(
michael@0 264 int32_t* state,
michael@0 265 uint16_t in,
michael@0 266 unsigned char* out,
michael@0 267 uint32_t outbuflen,
michael@0 268 uint32_t* outlen
michael@0 269 )
michael@0 270 {
michael@0 271 /* Don't check inlen. The caller should ensure it is larger than 0 */
michael@0 272 /* Oops, I don't agree. Code changed to check every time. [CATA] */
michael@0 273 if(outbuflen < 1)
michael@0 274 return 0;
michael@0 275 else
michael@0 276 {
michael@0 277 *outlen = 1;
michael@0 278 out[0] = in & 0xff;
michael@0 279 return 1;
michael@0 280 }
michael@0 281 }
michael@0 282
michael@0 283 /*=================================================================================
michael@0 284
michael@0 285 =================================================================================*/
michael@0 286 int uCheckAndGenAlways2Byte(
michael@0 287 int32_t* state,
michael@0 288 uint16_t in,
michael@0 289 unsigned char* out,
michael@0 290 uint32_t outbuflen,
michael@0 291 uint32_t* outlen
michael@0 292 )
michael@0 293 {
michael@0 294 if(outbuflen < 2)
michael@0 295 return 0;
michael@0 296 else
michael@0 297 {
michael@0 298 *outlen = 2;
michael@0 299 out[0] = ((in >> 8 ) & 0xff);
michael@0 300 out[1] = in & 0xff;
michael@0 301 return 1;
michael@0 302 }
michael@0 303 }
michael@0 304 /*=================================================================================
michael@0 305
michael@0 306 =================================================================================*/
michael@0 307 int uCheckAndGenAlways2ByteShiftGR(
michael@0 308 int32_t* state,
michael@0 309 uint16_t in,
michael@0 310 unsigned char* out,
michael@0 311 uint32_t outbuflen,
michael@0 312 uint32_t* outlen
michael@0 313 )
michael@0 314 {
michael@0 315 if(outbuflen < 2)
michael@0 316 return 0;
michael@0 317 else
michael@0 318 {
michael@0 319 *outlen = 2;
michael@0 320 out[0] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 321 out[1] = (in & 0xff) | 0x80;
michael@0 322 return 1;
michael@0 323 }
michael@0 324 }
michael@0 325 /*=================================================================================
michael@0 326
michael@0 327 =================================================================================*/
michael@0 328 int uGenerateShift(
michael@0 329 uShiftOutTable *shift,
michael@0 330 int32_t* state,
michael@0 331 uint16_t in,
michael@0 332 unsigned char* out,
michael@0 333 uint32_t outbuflen,
michael@0 334 uint32_t* outlen
michael@0 335 )
michael@0 336 {
michael@0 337 int16_t i;
michael@0 338 const uShiftOutCell* cell = &(shift->shiftcell[0]);
michael@0 339 int16_t itemnum = shift->numOfItem;
michael@0 340 unsigned char inH, inL;
michael@0 341 inH = (in >> 8) & 0xff;
michael@0 342 inL = (in & 0xff );
michael@0 343 for(i=0;i<itemnum;i++)
michael@0 344 {
michael@0 345 if( ( inL >= cell[i].shiftout_MinLB) &&
michael@0 346 ( inL <= cell[i].shiftout_MaxLB) &&
michael@0 347 ( inH >= cell[i].shiftout_MinHB) &&
michael@0 348 ( inH <= cell[i].shiftout_MaxHB) )
michael@0 349 {
michael@0 350 if(outbuflen < cell[i].reserveLen)
michael@0 351 {
michael@0 352 return 0;
michael@0 353 }
michael@0 354 else
michael@0 355 {
michael@0 356 *outlen = cell[i].reserveLen;
michael@0 357 return (uSubGenerator(cell[i].classID,in,out));
michael@0 358 }
michael@0 359 }
michael@0 360 }
michael@0 361 return 0;
michael@0 362 }
michael@0 363 /*=================================================================================
michael@0 364
michael@0 365 =================================================================================*/
michael@0 366 int uCheckAndGen2ByteGRPrefix8F(int32_t* state,
michael@0 367 uint16_t in,
michael@0 368 unsigned char* out,
michael@0 369 uint32_t outbuflen,
michael@0 370 uint32_t* outlen
michael@0 371 )
michael@0 372 {
michael@0 373 if(outbuflen < 3)
michael@0 374 return 0;
michael@0 375 else
michael@0 376 {
michael@0 377 *outlen = 3;
michael@0 378 out[0] = 0x8F;
michael@0 379 out[1] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 380 out[2] = (in & 0xff) | 0x80;
michael@0 381 return 1;
michael@0 382 }
michael@0 383 }
michael@0 384 /*=================================================================================
michael@0 385
michael@0 386 =================================================================================*/
michael@0 387 int uCheckAndGen2ByteGRPrefix8EA2(int32_t* state,
michael@0 388 uint16_t in,
michael@0 389 unsigned char* out,
michael@0 390 uint32_t outbuflen,
michael@0 391 uint32_t* outlen
michael@0 392 )
michael@0 393 {
michael@0 394 if(outbuflen < 4)
michael@0 395 return 0;
michael@0 396 else
michael@0 397 {
michael@0 398 *outlen = 4;
michael@0 399 out[0] = 0x8E;
michael@0 400 out[1] = 0xA2;
michael@0 401 out[2] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 402 out[3] = (in & 0xff) | 0x80;
michael@0 403 return 1;
michael@0 404 }
michael@0 405 }
michael@0 406
michael@0 407
michael@0 408 /*=================================================================================
michael@0 409
michael@0 410 =================================================================================*/
michael@0 411 int uCheckAndGen2ByteGRPrefix8EA3(int32_t* state,
michael@0 412 uint16_t in,
michael@0 413 unsigned char* out,
michael@0 414 uint32_t outbuflen,
michael@0 415 uint32_t* outlen
michael@0 416 )
michael@0 417 {
michael@0 418 if(outbuflen < 4)
michael@0 419 return 0;
michael@0 420 else
michael@0 421 {
michael@0 422 *outlen = 4;
michael@0 423 out[0] = 0x8E;
michael@0 424 out[1] = 0xA3;
michael@0 425 out[2] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 426 out[3] = (in & 0xff) | 0x80;
michael@0 427 return 1;
michael@0 428 }
michael@0 429 }
michael@0 430 /*=================================================================================
michael@0 431
michael@0 432 =================================================================================*/
michael@0 433 int uCheckAndGen2ByteGRPrefix8EA4(int32_t* state,
michael@0 434 uint16_t in,
michael@0 435 unsigned char* out,
michael@0 436 uint32_t outbuflen,
michael@0 437 uint32_t* outlen
michael@0 438 )
michael@0 439 {
michael@0 440 if(outbuflen < 4)
michael@0 441 return 0;
michael@0 442 else
michael@0 443 {
michael@0 444 *outlen = 4;
michael@0 445 out[0] = 0x8E;
michael@0 446 out[1] = 0xA4;
michael@0 447 out[2] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 448 out[3] = (in & 0xff) | 0x80;
michael@0 449 return 1;
michael@0 450 }
michael@0 451 }
michael@0 452 /*=================================================================================
michael@0 453
michael@0 454 =================================================================================*/
michael@0 455 int uCheckAndGen2ByteGRPrefix8EA5(int32_t* state,
michael@0 456 uint16_t in,
michael@0 457 unsigned char* out,
michael@0 458 uint32_t outbuflen,
michael@0 459 uint32_t* outlen
michael@0 460 )
michael@0 461 {
michael@0 462 if(outbuflen < 4)
michael@0 463 return 0;
michael@0 464 else
michael@0 465 {
michael@0 466 *outlen = 4;
michael@0 467 out[0] = 0x8E;
michael@0 468 out[1] = 0xA5;
michael@0 469 out[2] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 470 out[3] = (in & 0xff) | 0x80;
michael@0 471 return 1;
michael@0 472 }
michael@0 473 }
michael@0 474 /*=================================================================================
michael@0 475
michael@0 476 =================================================================================*/
michael@0 477 int uCheckAndGen2ByteGRPrefix8EA6(int32_t* state,
michael@0 478 uint16_t in,
michael@0 479 unsigned char* out,
michael@0 480 uint32_t outbuflen,
michael@0 481 uint32_t* outlen
michael@0 482 )
michael@0 483 {
michael@0 484 if(outbuflen < 4)
michael@0 485 return 0;
michael@0 486 else
michael@0 487 {
michael@0 488 *outlen = 4;
michael@0 489 out[0] = 0x8E;
michael@0 490 out[1] = 0xA6;
michael@0 491 out[2] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 492 out[3] = (in & 0xff) | 0x80;
michael@0 493 return 1;
michael@0 494 }
michael@0 495 }
michael@0 496 /*=================================================================================
michael@0 497
michael@0 498 =================================================================================*/
michael@0 499 int uCheckAndGen2ByteGRPrefix8EA7(int32_t* state,
michael@0 500 uint16_t in,
michael@0 501 unsigned char* out,
michael@0 502 uint32_t outbuflen,
michael@0 503 uint32_t* outlen
michael@0 504 )
michael@0 505 {
michael@0 506 if(outbuflen < 4)
michael@0 507 return 0;
michael@0 508 else
michael@0 509 {
michael@0 510 *outlen = 4;
michael@0 511 out[0] = 0x8E;
michael@0 512 out[1] = 0xA7;
michael@0 513 out[2] = ((in >> 8 ) & 0xff) | 0x80;
michael@0 514 out[3] = (in & 0xff) | 0x80;
michael@0 515 return 1;
michael@0 516 }
michael@0 517 }
michael@0 518 /*=================================================================================
michael@0 519
michael@0 520 =================================================================================*/
michael@0 521 #define SBase 0xAC00
michael@0 522 #define LCount 19
michael@0 523 #define VCount 21
michael@0 524 #define TCount 28
michael@0 525 #define NCount (VCount * TCount)
michael@0 526 /*=================================================================================
michael@0 527
michael@0 528 =================================================================================*/
michael@0 529 int uCnGAlways8BytesDecomposedHangul(
michael@0 530 int32_t* state,
michael@0 531 uint16_t in,
michael@0 532 unsigned char* out,
michael@0 533 uint32_t outbuflen,
michael@0 534 uint32_t* outlen
michael@0 535 )
michael@0 536 {
michael@0 537 static const uint8_t lMap[LCount] = {
michael@0 538 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
michael@0 539 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
michael@0 540 };
michael@0 541
michael@0 542 static const uint8_t tMap[TCount] = {
michael@0 543 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa,
michael@0 544 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5,
michael@0 545 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
michael@0 546 };
michael@0 547
michael@0 548 uint16_t SIndex, LIndex, VIndex, TIndex;
michael@0 549
michael@0 550 if(outbuflen < 8)
michael@0 551 return 0;
michael@0 552
michael@0 553 /* the following line are copy from Unicode 2.0 page 3-13 */
michael@0 554 /* item 1 of Hangul Syllabel Decomposition */
michael@0 555 SIndex = in - SBase;
michael@0 556
michael@0 557 /* the following lines are copy from Unicode 2.0 page 3-14 */
michael@0 558 /* item 2 of Hangul Syllabel Decomposition w/ modification */
michael@0 559 LIndex = SIndex / NCount;
michael@0 560 VIndex = (SIndex % NCount) / TCount;
michael@0 561 TIndex = SIndex % TCount;
michael@0 562
michael@0 563 /*
michael@0 564 * A Hangul syllable not enumerated in KS X 1001 is represented
michael@0 565 * by a sequence of 8 bytes beginning with Hangul-filler
michael@0 566 * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three
michael@0 567 * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making
michael@0 568 * up the syllable. ref. KS X 1001:1998 Annex 3
michael@0 569 */
michael@0 570 *outlen = 8;
michael@0 571 out[0] = out[2] = out[4] = out[6] = 0xa4;
michael@0 572 out[1] = 0xd4;
michael@0 573 out[3] = lMap[LIndex] ;
michael@0 574 out[5] = (VIndex + 0xbf);
michael@0 575 out[7] = tMap[TIndex];
michael@0 576
michael@0 577 return 1;
michael@0 578 }
michael@0 579
michael@0 580 int uCheckAndGenJohabHangul(
michael@0 581 int32_t* state,
michael@0 582 uint16_t in,
michael@0 583 unsigned char* out,
michael@0 584 uint32_t outbuflen,
michael@0 585 uint32_t* outlen
michael@0 586 )
michael@0 587 {
michael@0 588 if(outbuflen < 2)
michael@0 589 return 0;
michael@0 590 else
michael@0 591 {
michael@0 592 /*
michael@0 593 See Table 4-45 (page 183) of CJKV Information Processing
michael@0 594 for detail explanation of the following table.
michael@0 595 */
michael@0 596 /*
michael@0 597 static const uint8_t lMap[LCount] = {
michael@0 598 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
michael@0 599 };
michael@0 600 Therefore lMap[i] == i+2;
michael@0 601 */
michael@0 602
michael@0 603 static const uint8_t vMap[VCount] = {
michael@0 604 /* no 0,1,2 */
michael@0 605 3,4,5,6,7, /* no 8,9 */
michael@0 606 10,11,12,13,14,15, /* no 16,17 */
michael@0 607 18,19,20,21,22,23, /* no 24,25 */
michael@0 608 26,27,28,29
michael@0 609 };
michael@0 610 static const uint8_t tMap[TCount] = {
michael@0 611 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */
michael@0 612 19,20,21,22,23,24,25,26,27,28,29
michael@0 613 };
michael@0 614 uint16_t SIndex, LIndex, VIndex, TIndex, ch;
michael@0 615 /* the following line are copy from Unicode 2.0 page 3-13 */
michael@0 616 /* item 1 of Hangul Syllabel Decomposition */
michael@0 617 SIndex = in - SBase;
michael@0 618
michael@0 619 /* the following lines are copy from Unicode 2.0 page 3-14 */
michael@0 620 /* item 2 of Hangul Syllabel Decomposition w/ modification */
michael@0 621 LIndex = SIndex / NCount;
michael@0 622 VIndex = (SIndex % NCount) / TCount;
michael@0 623 TIndex = SIndex % TCount;
michael@0 624
michael@0 625 *outlen = 2;
michael@0 626 ch = 0x8000 |
michael@0 627 ((LIndex+2)<<10) |
michael@0 628 (vMap[VIndex]<<5)|
michael@0 629 tMap[TIndex];
michael@0 630 out[0] = (ch >> 8);
michael@0 631 out[1] = ch & 0x00FF;
michael@0 632 #if 0
michael@0 633 printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex);
michael@0 634 #endif
michael@0 635 return 1;
michael@0 636 }
michael@0 637 }
michael@0 638 int uCheckAndGenJohabSymbol(
michael@0 639 int32_t* state,
michael@0 640 uint16_t in,
michael@0 641 unsigned char* out,
michael@0 642 uint32_t outbuflen,
michael@0 643 uint32_t* outlen
michael@0 644 )
michael@0 645 {
michael@0 646 if(outbuflen < 2)
michael@0 647 return 0;
michael@0 648 else
michael@0 649 {
michael@0 650 /* The following code are based on the Perl code listed under
michael@0 651 * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013)
michael@0 652 * in the book "CJKV Information Processing" by
michael@0 653 * Ken Lunde <lunde@adobe.com>
michael@0 654 *
michael@0 655 * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab
michael@0 656 * my @euc = unpack("C*", $_[0]);
michael@0 657 * my ($fe_off, $hi_off, $lo_off) = (0,0,1);
michael@0 658 * my @out = ();
michael@0 659 * while(($hi, $lo) = splice(@euc, 0, 2)) {
michael@0 660 * $hi &= 127; $lo &= 127;
michael@0 661 * $fe_off = 21 if $hi == 73;
michael@0 662 * $fe_off = 34 if $hi == 126;
michael@0 663 * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125);
michael@0 664 * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off),
michael@0 665 * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128));
michael@0 666 * }
michael@0 667 * return pack("C*", @out);
michael@0 668 */
michael@0 669
michael@0 670 unsigned char fe_off = 0;
michael@0 671 unsigned char hi_off = 0;
michael@0 672 unsigned char lo_off = 1;
michael@0 673 unsigned char hi = (in >> 8) & 0x7F;
michael@0 674 unsigned char lo = in & 0x7F;
michael@0 675 if(73 == hi)
michael@0 676 fe_off = 21;
michael@0 677 if(126 == hi)
michael@0 678 fe_off = 34;
michael@0 679 if( (hi < 74) || ( hi > 125) )
michael@0 680 {
michael@0 681 hi_off = 1;
michael@0 682 lo_off = 0;
michael@0 683 }
michael@0 684 *outlen = 2;
michael@0 685 out[0] = ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off;
michael@0 686 out[1] = lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) :
michael@0 687 128);
michael@0 688 #if 0
michael@0 689 printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in);
michael@0 690 #endif
michael@0 691 return 1;
michael@0 692 }
michael@0 693 }
michael@0 694 int uCheckAndGen4BytesGB18030(
michael@0 695 int32_t* state,
michael@0 696 uint16_t in,
michael@0 697 unsigned char* out,
michael@0 698 uint32_t outbuflen,
michael@0 699 uint32_t* outlen
michael@0 700 )
michael@0 701 {
michael@0 702 if(outbuflen < 4)
michael@0 703 return 0;
michael@0 704 out[0] = (in / (10*126*10)) + 0x81;
michael@0 705 in %= (10*126*10);
michael@0 706 out[1] = (in / (10*126)) + 0x30;
michael@0 707 in %= (10*126);
michael@0 708 out[2] = (in / (10)) + 0x81;
michael@0 709 out[3] = (in % 10) + 0x30;
michael@0 710 *outlen = 4;
michael@0 711 return 1;
michael@0 712 }

mercurial