|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 #include "unicpriv.h" |
|
6 #define CHK_GR94(b) ( (uint8_t) 0xa0 < (uint8_t) (b) && (uint8_t) (b) < (uint8_t) 0xff ) |
|
7 #define CHK_GR94_2Byte(b1,b2) (CHK_GR94(b1) && CHK_GR94(b2)) |
|
8 /*================================================================================= |
|
9 |
|
10 =================================================================================*/ |
|
11 typedef int (*uSubScannerFunc) (unsigned char* in, uint16_t* out); |
|
12 /*================================================================================= |
|
13 |
|
14 =================================================================================*/ |
|
15 |
|
16 typedef int (*uScannerFunc) ( |
|
17 int32_t* state, |
|
18 unsigned char *in, |
|
19 uint16_t *out, |
|
20 uint32_t inbuflen, |
|
21 uint32_t* inscanlen |
|
22 ); |
|
23 |
|
24 int uScan( |
|
25 uScanClassID scanClass, |
|
26 int32_t* state, |
|
27 unsigned char *in, |
|
28 uint16_t *out, |
|
29 uint32_t inbuflen, |
|
30 uint32_t* inscanlen |
|
31 ); |
|
32 |
|
33 #define uSubScanner(sub,in,out) (* m_subscanner[sub])((in),(out)) |
|
34 |
|
35 int uCheckAndScanAlways1Byte( |
|
36 int32_t* state, |
|
37 unsigned char *in, |
|
38 uint16_t *out, |
|
39 uint32_t inbuflen, |
|
40 uint32_t* inscanlen |
|
41 ); |
|
42 int uCheckAndScanAlways2Byte( |
|
43 int32_t* state, |
|
44 unsigned char *in, |
|
45 uint16_t *out, |
|
46 uint32_t inbuflen, |
|
47 uint32_t* inscanlen |
|
48 ); |
|
49 int uCheckAndScanAlways2ByteShiftGR( |
|
50 int32_t* state, |
|
51 unsigned char *in, |
|
52 uint16_t *out, |
|
53 uint32_t inbuflen, |
|
54 uint32_t* inscanlen |
|
55 ); |
|
56 int uCheckAndScanAlways2ByteGR128( |
|
57 int32_t* state, |
|
58 unsigned char *in, |
|
59 uint16_t *out, |
|
60 uint32_t inbuflen, |
|
61 uint32_t* inscanlen |
|
62 ); |
|
63 int uScanShift( |
|
64 uShiftInTable *shift, |
|
65 int32_t* state, |
|
66 unsigned char *in, |
|
67 uint16_t *out, |
|
68 uint32_t inbuflen, |
|
69 uint32_t* inscanlen |
|
70 ); |
|
71 |
|
72 int uCheckAndScan2ByteGRPrefix8F( |
|
73 int32_t* state, |
|
74 unsigned char *in, |
|
75 uint16_t *out, |
|
76 uint32_t inbuflen, |
|
77 uint32_t* inscanlen |
|
78 ); |
|
79 int uCheckAndScan2ByteGRPrefix8EA2( |
|
80 int32_t* state, |
|
81 unsigned char *in, |
|
82 uint16_t *out, |
|
83 uint32_t inbuflen, |
|
84 uint32_t* inscanlen |
|
85 ); |
|
86 int uCheckAndScan2ByteGRPrefix8EA3( |
|
87 int32_t* state, |
|
88 unsigned char *in, |
|
89 uint16_t *out, |
|
90 uint32_t inbuflen, |
|
91 uint32_t* inscanlen |
|
92 ); |
|
93 int uCheckAndScan2ByteGRPrefix8EA4( |
|
94 int32_t* state, |
|
95 unsigned char *in, |
|
96 uint16_t *out, |
|
97 uint32_t inbuflen, |
|
98 uint32_t* inscanlen |
|
99 ); |
|
100 int uCheckAndScan2ByteGRPrefix8EA5( |
|
101 int32_t* state, |
|
102 unsigned char *in, |
|
103 uint16_t *out, |
|
104 uint32_t inbuflen, |
|
105 uint32_t* inscanlen |
|
106 ); |
|
107 int uCheckAndScan2ByteGRPrefix8EA6( |
|
108 int32_t* state, |
|
109 unsigned char *in, |
|
110 uint16_t *out, |
|
111 uint32_t inbuflen, |
|
112 uint32_t* inscanlen |
|
113 ); |
|
114 int uCheckAndScan2ByteGRPrefix8EA7( |
|
115 int32_t* state, |
|
116 unsigned char *in, |
|
117 uint16_t *out, |
|
118 uint32_t inbuflen, |
|
119 uint32_t* inscanlen |
|
120 ); |
|
121 int uCnSAlways8BytesDecomposedHangul( |
|
122 int32_t* state, |
|
123 unsigned char *in, |
|
124 uint16_t *out, |
|
125 uint32_t inbuflen, |
|
126 uint32_t* inscanlen |
|
127 ); |
|
128 int uCheckAndScanJohabHangul( |
|
129 int32_t* state, |
|
130 unsigned char *in, |
|
131 uint16_t *out, |
|
132 uint32_t inbuflen, |
|
133 uint32_t* inscanlen |
|
134 ); |
|
135 int uCheckAndScanJohabSymbol( |
|
136 int32_t* state, |
|
137 unsigned char *in, |
|
138 uint16_t *out, |
|
139 uint32_t inbuflen, |
|
140 uint32_t* inscanlen |
|
141 ); |
|
142 |
|
143 int uCheckAndScan4BytesGB18030( |
|
144 int32_t* state, |
|
145 unsigned char *in, |
|
146 uint16_t *out, |
|
147 uint32_t inbuflen, |
|
148 uint32_t* inscanlen |
|
149 ); |
|
150 |
|
151 int uScanAlways2Byte( |
|
152 unsigned char* in, |
|
153 uint16_t* out |
|
154 ); |
|
155 int uScanAlways2ByteShiftGR( |
|
156 unsigned char* in, |
|
157 uint16_t* out |
|
158 ); |
|
159 int uScanAlways1Byte( |
|
160 unsigned char* in, |
|
161 uint16_t* out |
|
162 ); |
|
163 int uScanAlways1BytePrefix8E( |
|
164 unsigned char* in, |
|
165 uint16_t* out |
|
166 ); |
|
167 /*================================================================================= |
|
168 |
|
169 =================================================================================*/ |
|
170 const uScannerFunc m_scanner[uNumOfCharsetType] = |
|
171 { |
|
172 uCheckAndScanAlways1Byte, |
|
173 uCheckAndScanAlways2Byte, |
|
174 uCheckAndScanAlways2ByteShiftGR, |
|
175 uCheckAndScan2ByteGRPrefix8F, |
|
176 uCheckAndScan2ByteGRPrefix8EA2, |
|
177 uCheckAndScan2ByteGRPrefix8EA3, |
|
178 uCheckAndScan2ByteGRPrefix8EA4, |
|
179 uCheckAndScan2ByteGRPrefix8EA5, |
|
180 uCheckAndScan2ByteGRPrefix8EA6, |
|
181 uCheckAndScan2ByteGRPrefix8EA7, |
|
182 uCnSAlways8BytesDecomposedHangul, |
|
183 uCheckAndScanJohabHangul, |
|
184 uCheckAndScanJohabSymbol, |
|
185 uCheckAndScan4BytesGB18030, |
|
186 uCheckAndScanAlways2ByteGR128 |
|
187 }; |
|
188 |
|
189 /*================================================================================= |
|
190 |
|
191 =================================================================================*/ |
|
192 |
|
193 const uSubScannerFunc m_subscanner[uNumOfCharType] = |
|
194 { |
|
195 uScanAlways1Byte, |
|
196 uScanAlways2Byte, |
|
197 uScanAlways2ByteShiftGR, |
|
198 uScanAlways1BytePrefix8E |
|
199 }; |
|
200 /*================================================================================= |
|
201 |
|
202 =================================================================================*/ |
|
203 int uScan( |
|
204 uScanClassID scanClass, |
|
205 int32_t* state, |
|
206 unsigned char *in, |
|
207 uint16_t *out, |
|
208 uint32_t inbuflen, |
|
209 uint32_t* inscanlen |
|
210 ) |
|
211 { |
|
212 return (* m_scanner[scanClass]) (state,in,out,inbuflen,inscanlen); |
|
213 } |
|
214 /*================================================================================= |
|
215 |
|
216 =================================================================================*/ |
|
217 int uScanAlways1Byte( |
|
218 unsigned char* in, |
|
219 uint16_t* out |
|
220 ) |
|
221 { |
|
222 *out = (uint16_t) in[0]; |
|
223 return 1; |
|
224 } |
|
225 |
|
226 /*================================================================================= |
|
227 |
|
228 =================================================================================*/ |
|
229 int uScanAlways2Byte( |
|
230 unsigned char* in, |
|
231 uint16_t* out |
|
232 ) |
|
233 { |
|
234 *out = (uint16_t) (( in[0] << 8) | (in[1])); |
|
235 return 1; |
|
236 } |
|
237 /*================================================================================= |
|
238 |
|
239 =================================================================================*/ |
|
240 int uScanAlways2ByteShiftGR( |
|
241 unsigned char* in, |
|
242 uint16_t* out |
|
243 ) |
|
244 { |
|
245 *out = (uint16_t) ((( in[0] << 8) | (in[1])) & 0x7F7F); |
|
246 return 1; |
|
247 } |
|
248 |
|
249 /*================================================================================= |
|
250 |
|
251 =================================================================================*/ |
|
252 int uScanAlways1BytePrefix8E( |
|
253 unsigned char* in, |
|
254 uint16_t* out |
|
255 ) |
|
256 { |
|
257 *out = (uint16_t) in[1]; |
|
258 return 1; |
|
259 } |
|
260 /*================================================================================= |
|
261 |
|
262 =================================================================================*/ |
|
263 int uCheckAndScanAlways1Byte( |
|
264 int32_t* state, |
|
265 unsigned char *in, |
|
266 uint16_t *out, |
|
267 uint32_t inbuflen, |
|
268 uint32_t* inscanlen |
|
269 ) |
|
270 { |
|
271 /* Don't check inlen. The caller should ensure it is larger than 0 */ |
|
272 *inscanlen = 1; |
|
273 *out = (uint16_t) in[0]; |
|
274 |
|
275 return 1; |
|
276 } |
|
277 |
|
278 /*================================================================================= |
|
279 |
|
280 =================================================================================*/ |
|
281 int uCheckAndScanAlways2Byte( |
|
282 int32_t* state, |
|
283 unsigned char *in, |
|
284 uint16_t *out, |
|
285 uint32_t inbuflen, |
|
286 uint32_t* inscanlen |
|
287 ) |
|
288 { |
|
289 if(inbuflen < 2) |
|
290 return 0; |
|
291 else |
|
292 { |
|
293 *inscanlen = 2; |
|
294 *out = ((in[0] << 8) | ( in[1])) ; |
|
295 return 1; |
|
296 } |
|
297 } |
|
298 /*================================================================================= |
|
299 |
|
300 =================================================================================*/ |
|
301 int uCheckAndScanAlways2ByteShiftGR( |
|
302 int32_t* state, |
|
303 unsigned char *in, |
|
304 uint16_t *out, |
|
305 uint32_t inbuflen, |
|
306 uint32_t* inscanlen |
|
307 ) |
|
308 { |
|
309 /* |
|
310 * Both bytes should be in the range of [0xa1,0xfe] for 94x94 character sets |
|
311 * invoked on GR. No encoding implemented in Mozilla uses 96x96 char. sets. |
|
312 * Only 2nd byte range needs to be checked because |
|
313 * 1st byte is checked before calling this in nsUnicodeDecoerHelper.cpp |
|
314 */ |
|
315 if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ |
|
316 return 0; |
|
317 else if (! CHK_GR94(in[1])) |
|
318 { |
|
319 *inscanlen = 2; |
|
320 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
|
321 return 1; |
|
322 } |
|
323 else |
|
324 { |
|
325 *inscanlen = 2; |
|
326 *out = (((in[0] << 8) | ( in[1])) & 0x7F7F); |
|
327 return 1; |
|
328 } |
|
329 } |
|
330 /*================================================================================= |
|
331 |
|
332 =================================================================================*/ |
|
333 int uCheckAndScanAlways2ByteGR128( |
|
334 int32_t* state, |
|
335 unsigned char *in, |
|
336 uint16_t *out, |
|
337 uint32_t inbuflen, |
|
338 uint32_t* inscanlen |
|
339 ) |
|
340 { |
|
341 /* |
|
342 * The first byte should be in [0xa1,0xfe] |
|
343 * and the second byte in [0x41,0xfe] |
|
344 * Used by CP949 -> Unicode converter. |
|
345 * Only 2nd byte range needs to be checked because |
|
346 * 1st byte is checked before calling this in nsUnicodeDecoderHelper.cpp |
|
347 */ |
|
348 if(inbuflen < 2) /* will lead to NS_OK_UDEC_MOREINPUT */ |
|
349 return 0; |
|
350 else if (in[1] < 0x41) /* 2nd byte range check */ |
|
351 { |
|
352 *inscanlen = 2; |
|
353 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
|
354 return 1; |
|
355 } |
|
356 else |
|
357 { |
|
358 *inscanlen = 2; |
|
359 *out = (in[0] << 8) | in[1]; |
|
360 return 1; |
|
361 } |
|
362 } |
|
363 /*================================================================================= |
|
364 |
|
365 =================================================================================*/ |
|
366 int uScanShift( |
|
367 uShiftInTable *shift, |
|
368 int32_t* state, |
|
369 unsigned char *in, |
|
370 uint16_t *out, |
|
371 uint32_t inbuflen, |
|
372 uint32_t* inscanlen |
|
373 ) |
|
374 { |
|
375 int16_t i; |
|
376 const uShiftInCell* cell = &(shift->shiftcell[0]); |
|
377 int16_t itemnum = shift->numOfItem; |
|
378 for(i=0;i<itemnum;i++) |
|
379 { |
|
380 if( ( in[0] >= cell[i].shiftin_Min) && |
|
381 ( in[0] <= cell[i].shiftin_Max)) |
|
382 { |
|
383 if(inbuflen < cell[i].reserveLen) |
|
384 return 0; |
|
385 else |
|
386 { |
|
387 *inscanlen = cell[i].reserveLen; |
|
388 return (uSubScanner(cell[i].classID,in,out)); |
|
389 } |
|
390 } |
|
391 } |
|
392 return 0; |
|
393 } |
|
394 /*================================================================================= |
|
395 |
|
396 =================================================================================*/ |
|
397 int uCheckAndScan2ByteGRPrefix8F( |
|
398 int32_t* state, |
|
399 unsigned char *in, |
|
400 uint16_t *out, |
|
401 uint32_t inbuflen, |
|
402 uint32_t* inscanlen |
|
403 ) |
|
404 { |
|
405 if((inbuflen < 3) ||(in[0] != 0x8F)) |
|
406 return 0; |
|
407 else if (! CHK_GR94(in[1])) /* 2nd byte range check */ |
|
408 { |
|
409 *inscanlen = 2; |
|
410 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
|
411 return 1; |
|
412 } |
|
413 else if (! CHK_GR94(in[2])) /* 3rd byte range check */ |
|
414 { |
|
415 *inscanlen = 3; |
|
416 *out = 0xFF; /* for 2-byte table, uMap() is guaranteed to fail for 0xFF. */ |
|
417 return 1; |
|
418 } |
|
419 else |
|
420 { |
|
421 *inscanlen = 3; |
|
422 *out = (((in[1] << 8) | ( in[2])) & 0x7F7F); |
|
423 return 1; |
|
424 } |
|
425 } |
|
426 /*================================================================================= |
|
427 |
|
428 =================================================================================*/ |
|
429 |
|
430 /* Macro definition to use for uCheckAndScan2ByteGRPrefix8EAX() |
|
431 * where X is 2,3,4,5,6,7 |
|
432 */ |
|
433 #define CNS_8EAX_4BYTE(PREFIX) \ |
|
434 if((inbuflen < 4) || (in[0] != 0x8E)) \ |
|
435 return 0; \ |
|
436 else if((in[1] != (PREFIX))) \ |
|
437 { \ |
|
438 *inscanlen = 2; \ |
|
439 *out = 0xFF; \ |
|
440 return 1; \ |
|
441 } \ |
|
442 else if(! CHK_GR94(in[2])) \ |
|
443 { \ |
|
444 *inscanlen = 3; \ |
|
445 *out = 0xFF; \ |
|
446 return 1; \ |
|
447 } \ |
|
448 else if(! CHK_GR94(in[3])) \ |
|
449 { \ |
|
450 *inscanlen = 4; \ |
|
451 *out = 0xFF; \ |
|
452 return 1; \ |
|
453 } \ |
|
454 else \ |
|
455 { \ |
|
456 *inscanlen = 4; \ |
|
457 *out = (((in[2] << 8) | ( in[3])) & 0x7F7F); \ |
|
458 return 1; \ |
|
459 } |
|
460 |
|
461 int uCheckAndScan2ByteGRPrefix8EA2( |
|
462 int32_t* state, |
|
463 unsigned char *in, |
|
464 uint16_t *out, |
|
465 uint32_t inbuflen, |
|
466 uint32_t* inscanlen |
|
467 ) |
|
468 { |
|
469 CNS_8EAX_4BYTE(0xA2) |
|
470 } |
|
471 |
|
472 /*================================================================================= |
|
473 |
|
474 =================================================================================*/ |
|
475 int uCheckAndScan2ByteGRPrefix8EA3( |
|
476 int32_t* state, |
|
477 unsigned char *in, |
|
478 uint16_t *out, |
|
479 uint32_t inbuflen, |
|
480 uint32_t* inscanlen |
|
481 ) |
|
482 { |
|
483 CNS_8EAX_4BYTE(0xA3) |
|
484 } |
|
485 /*================================================================================= |
|
486 |
|
487 =================================================================================*/ |
|
488 int uCheckAndScan2ByteGRPrefix8EA4( |
|
489 int32_t* state, |
|
490 unsigned char *in, |
|
491 uint16_t *out, |
|
492 uint32_t inbuflen, |
|
493 uint32_t* inscanlen |
|
494 ) |
|
495 { |
|
496 CNS_8EAX_4BYTE(0xA4) |
|
497 } |
|
498 /*================================================================================= |
|
499 |
|
500 =================================================================================*/ |
|
501 int uCheckAndScan2ByteGRPrefix8EA5( |
|
502 int32_t* state, |
|
503 unsigned char *in, |
|
504 uint16_t *out, |
|
505 uint32_t inbuflen, |
|
506 uint32_t* inscanlen |
|
507 ) |
|
508 { |
|
509 CNS_8EAX_4BYTE(0xA5) |
|
510 } |
|
511 /*================================================================================= |
|
512 |
|
513 =================================================================================*/ |
|
514 int uCheckAndScan2ByteGRPrefix8EA6( |
|
515 int32_t* state, |
|
516 unsigned char *in, |
|
517 uint16_t *out, |
|
518 uint32_t inbuflen, |
|
519 uint32_t* inscanlen |
|
520 ) |
|
521 { |
|
522 CNS_8EAX_4BYTE(0xA6) |
|
523 } |
|
524 /*================================================================================= |
|
525 |
|
526 =================================================================================*/ |
|
527 int uCheckAndScan2ByteGRPrefix8EA7( |
|
528 int32_t* state, |
|
529 unsigned char *in, |
|
530 uint16_t *out, |
|
531 uint32_t inbuflen, |
|
532 uint32_t* inscanlen |
|
533 ) |
|
534 { |
|
535 CNS_8EAX_4BYTE(0xA7) |
|
536 } |
|
537 /*================================================================================= |
|
538 |
|
539 =================================================================================*/ |
|
540 #define SBase 0xAC00 |
|
541 #define SCount 11172 |
|
542 #define LCount 19 |
|
543 #define VCount 21 |
|
544 #define TCount 28 |
|
545 #define NCount (VCount * TCount) |
|
546 |
|
547 int uCnSAlways8BytesDecomposedHangul( |
|
548 int32_t* state, |
|
549 unsigned char *in, |
|
550 uint16_t *out, |
|
551 uint32_t inbuflen, |
|
552 uint32_t* inscanlen |
|
553 ) |
|
554 { |
|
555 |
|
556 uint16_t LIndex, VIndex, TIndex; |
|
557 /* no 8 bytes, not in a4 range, or the first 2 byte are not a4d4 */ |
|
558 if((inbuflen < 8) || (0xa4 != in[0]) || (0xd4 != in[1]) || |
|
559 (0xa4 != in[2] ) || (0xa4 != in[4]) || (0xa4 != in[6])) |
|
560 return 0; |
|
561 |
|
562 /* Compute LIndex */ |
|
563 if((in[3] < 0xa1) || (in[3] > 0xbe)) { /* illegal leading consonant */ |
|
564 return 0; |
|
565 } |
|
566 else { |
|
567 static const uint8_t lMap[] = { |
|
568 /* A1 A2 A3 A4 A5 A6 A7 */ |
|
569 0, 1,0xff, 2,0xff,0xff, 3, |
|
570 /* A8 A9 AA AB AC AD AE AF */ |
|
571 4, 5,0xff,0xff,0xff,0xff,0xff,0xff, |
|
572 /* B0 B1 B2 B3 B4 B5 B6 B7 */ |
|
573 0xff, 6, 7, 8,0xff, 9, 10, 11, |
|
574 /* B8 B9 BA BB BC BD BE */ |
|
575 12, 13, 14, 15, 16, 17, 18 |
|
576 }; |
|
577 |
|
578 LIndex = lMap[in[3] - 0xa1]; |
|
579 if(0xff == (0xff & LIndex)) |
|
580 return 0; |
|
581 } |
|
582 |
|
583 /* Compute VIndex */ |
|
584 if((in[5] < 0xbf) || (in[5] > 0xd3)) { /* illegal medial vowel */ |
|
585 return 0; |
|
586 } |
|
587 else { |
|
588 VIndex = in[5] - 0xbf; |
|
589 } |
|
590 |
|
591 /* Compute TIndex */ |
|
592 if(0xd4 == in[7]) |
|
593 { |
|
594 TIndex = 0; |
|
595 } |
|
596 else if((in[7] < 0xa1) || (in[7] > 0xbe)) {/* illegal trailing consonant */ |
|
597 return 0; |
|
598 } |
|
599 else { |
|
600 static const uint8_t tMap[] = { |
|
601 /* A1 A2 A3 A4 A5 A6 A7 */ |
|
602 1, 2, 3, 4, 5, 6, 7, |
|
603 /* A8 A9 AA AB AC AD AE AF */ |
|
604 0xff, 8, 9, 10, 11, 12, 13, 14, |
|
605 /* B0 B1 B2 B3 B4 B5 B6 B7 */ |
|
606 15, 16, 17,0xff, 18, 19, 20, 21, |
|
607 /* B8 B9 BA BB BC BD BE */ |
|
608 22,0xff, 23, 24, 25, 26, 27 |
|
609 }; |
|
610 TIndex = tMap[in[7] - 0xa1]; |
|
611 if(0xff == (0xff & TIndex)) |
|
612 return 0; |
|
613 } |
|
614 |
|
615 *inscanlen = 8; |
|
616 /* the following line is from Unicode 2.0 page 3-13 item 5 */ |
|
617 *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; |
|
618 |
|
619 return 1; |
|
620 } |
|
621 /*================================================================================= |
|
622 |
|
623 =================================================================================*/ |
|
624 |
|
625 int uCheckAndScanJohabHangul( |
|
626 int32_t* state, |
|
627 unsigned char *in, |
|
628 uint16_t *out, |
|
629 uint32_t inbuflen, |
|
630 uint32_t* inscanlen |
|
631 ) |
|
632 { |
|
633 /* since we don't have code to convert Johab to Unicode right now * |
|
634 * make this part of code #if 0 to save space until we fully test it */ |
|
635 if(inbuflen < 2) |
|
636 return 0; |
|
637 else { |
|
638 /* |
|
639 * See Table 4-45 Johab Encoding's Five-Bit Binary Patterns in page 183 |
|
640 * of "CJKV Information Processing" for details |
|
641 */ |
|
642 static const uint8_t lMap[32]={ /* totaly 19 */ |
|
643 0xff,0xff,0, 1, 2, 3, 4, 5, /* 0-7 */ |
|
644 6, 7, 8, 9, 10, 11, 12, 13, /* 8-15 */ |
|
645 14, 15, 16, 17, 18, 0xff,0xff,0xff, /* 16-23 */ |
|
646 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff /* 24-31 */ |
|
647 }; |
|
648 static const uint8_t vMap[32]={ /* totaly 21 */ |
|
649 0xff,0xff,0xff,0, 1, 2, 3, 4, /* 0-7 */ |
|
650 0xff,0xff,5, 6, 7, 8, 9, 10, /* 8-15 */ |
|
651 0xff,0xff,11, 12, 13, 14, 15, 16, /* 16-23 */ |
|
652 0xff,0xff,17, 18, 19, 20, 0xff,0xff /* 24-31 */ |
|
653 }; |
|
654 static const uint8_t tMap[32]={ /* totaly 29 */ |
|
655 0xff,0, 1, 2, 3, 4, 5, 6, /* 0-7 */ |
|
656 7, 8, 9, 10, 11, 12, 13, 14, /* 8-15 */ |
|
657 15, 16, 0xff,17, 18, 19, 20, 21, /* 16-23 */ |
|
658 22, 23, 24, 25, 26, 27, 0xff,0xff /* 24-31 */ |
|
659 }; |
|
660 uint16_t ch = (in[0] << 8) | in[1]; |
|
661 uint16_t LIndex, VIndex, TIndex; |
|
662 if(0 == (0x8000 & ch)) |
|
663 return 0; |
|
664 LIndex=lMap[(ch>>10)& 0x1F]; |
|
665 VIndex=vMap[(ch>>5) & 0x1F]; |
|
666 TIndex=tMap[(ch>>0) & 0x1F]; |
|
667 if((0xff==(LIndex)) || |
|
668 (0xff==(VIndex)) || |
|
669 (0xff==(TIndex))) |
|
670 return 0; |
|
671 /* the following line is from Unicode 2.0 page 3-13 item 5 */ |
|
672 *out = ( LIndex * VCount + VIndex) * TCount + TIndex + SBase; |
|
673 *inscanlen = 2; |
|
674 return 1; |
|
675 } |
|
676 } |
|
677 int uCheckAndScanJohabSymbol( |
|
678 int32_t* state, |
|
679 unsigned char *in, |
|
680 uint16_t *out, |
|
681 uint32_t inbuflen, |
|
682 uint32_t* inscanlen |
|
683 ) |
|
684 { |
|
685 if(inbuflen < 2) |
|
686 return 0; |
|
687 else { |
|
688 /* |
|
689 * The following code are based on the Perl code lised under |
|
690 * "Johab to ISO-2022-KR or EUC-KR Conversion" in page 1014 of |
|
691 * "CJKV Information Processing" by Ken Lunde <lunde@adobe.com> |
|
692 * |
|
693 * sub johab2ks ($) { # Convert Johab to ISO-2022-KR |
|
694 * my @johab = unpack("C*", $_[0]); |
|
695 * my ($offset, $d8_off) = (0,0); |
|
696 * my @out = (); |
|
697 * while(($hi, $lo) = splice($johab, 0, 2)) { |
|
698 * $offset = 1 if ($hi > 223 and $hi < 250); |
|
699 * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); |
|
700 * push (@out, (((($hi - ($hi < 223 ? 200 : 187)) << 1) - |
|
701 * ($lo < 161 ? 1 : 0) + $offset) + $d8_off), |
|
702 * $lo - ($lo < 161 ? ($lo > 126 ? 34 : 16) : 128 )); |
|
703 * } |
|
704 * return pack ("C*", @out); |
|
705 * } |
|
706 * additional comments from Ken Lunde |
|
707 * $d8_off = ($hi == 216 and ($lo > 160 ? 94 : 42)); |
|
708 * has three possible return values: |
|
709 * 0 if $hi is not equal to 216 |
|
710 * 94 if $hi is euqal to 216 and if $lo is greater than 160 |
|
711 * 42 if $hi is euqal to 216 and if $lo is not greater than 160 |
|
712 */ |
|
713 unsigned char hi = in[0]; |
|
714 unsigned char lo = in[1]; |
|
715 uint16_t offset = (( hi > 223 ) && ( hi < 250)) ? 1 : 0; |
|
716 uint16_t d8_off = 0; |
|
717 if(216 == hi) { |
|
718 if( lo > 160) |
|
719 d8_off = 94; |
|
720 else |
|
721 d8_off = 42; |
|
722 } |
|
723 |
|
724 *out = (((((hi - ((hi < 223) ? 200 : 187)) << 1) - |
|
725 (lo < 161 ? 1 : 0) + offset) + d8_off) << 8 ) | |
|
726 (lo - ((lo < 161) ? ((lo > 126) ? 34 : 16) : |
|
727 128)); |
|
728 *inscanlen = 2; |
|
729 return 1; |
|
730 } |
|
731 } |
|
732 int uCheckAndScan4BytesGB18030( |
|
733 int32_t* state, |
|
734 unsigned char *in, |
|
735 uint16_t *out, |
|
736 uint32_t inbuflen, |
|
737 uint32_t* inscanlen |
|
738 ) |
|
739 { |
|
740 uint32_t data; |
|
741 if(inbuflen < 4) |
|
742 return 0; |
|
743 |
|
744 if((in[0] < 0x81 ) || (0xfe < in[0])) |
|
745 return 0; |
|
746 if((in[1] < 0x30 ) || (0x39 < in[1])) |
|
747 return 0; |
|
748 if((in[2] < 0x81 ) || (0xfe < in[2])) |
|
749 return 0; |
|
750 if((in[3] < 0x30 ) || (0x39 < in[3])) |
|
751 return 0; |
|
752 |
|
753 data = (((((in[0] - 0x81) * 10 + (in[1] - 0x30)) * 126) + |
|
754 (in[2] - 0x81)) * 10 ) + (in[3] - 0x30); |
|
755 |
|
756 *inscanlen = 4; |
|
757 *out = (data < 0x00010000) ? data : 0xFFFD; |
|
758 return 1; |
|
759 } |