intl/uconv/ucvcn/nsISO2022CNToUnicode.cpp

branch
TOR_BUG_3246
changeset 4
fc2d59ddac77
equal deleted inserted replaced
-1:000000000000 0:5a903556db24
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsISO2022CNToUnicode.h"
6 #include "nsUCSupport.h"
7 #include "nsICharsetConverterManager.h"
8 #include "nsServiceManagerUtils.h"
9
10 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
11
12 NS_IMETHODIMP nsISO2022CNToUnicode::GB2312_To_Unicode(unsigned char *aSrc, int32_t aSrcLength, char16_t * aDest, int32_t * aDestLength)
13 {
14 nsresult rv;
15
16 if(!mGB2312_Decoder) {
17 // creating a delegate converter (GB2312)
18 nsCOMPtr<nsICharsetConverterManager> ccm =
19 do_GetService(kCharsetConverterManagerCID, &rv);
20 if(NS_FAILED(rv))
21 return NS_ERROR_UNEXPECTED;
22
23 rv = ccm->GetUnicodeDecoderRaw("GB2312", getter_AddRefs(mGB2312_Decoder));
24 if(NS_FAILED(rv))
25 return NS_ERROR_UNEXPECTED;
26 }
27
28 if(!mGB2312_Decoder) // failed creating a delegate converter
29 return NS_ERROR_UNEXPECTED;
30
31 rv = mGB2312_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
32 return rv;
33 }
34
35 NS_IMETHODIMP nsISO2022CNToUnicode::EUCTW_To_Unicode(unsigned char *aSrc, int32_t aSrcLength, char16_t * aDest, int32_t * aDestLength)
36 {
37 nsresult rv;
38
39 if(!mEUCTW_Decoder) {
40 // creating a delegate converter (x-euc-tw)
41 nsCOMPtr<nsICharsetConverterManager> ccm =
42 do_GetService(kCharsetConverterManagerCID, &rv);
43 if(NS_FAILED(rv))
44 return NS_ERROR_UNEXPECTED;
45
46 rv = ccm->GetUnicodeDecoderRaw("x-euc-tw", getter_AddRefs(mEUCTW_Decoder));
47 if(NS_FAILED(rv))
48 return NS_ERROR_UNEXPECTED;
49 }
50
51 if(!mEUCTW_Decoder) // failed creating a delegate converter
52 return NS_ERROR_UNEXPECTED;
53
54 rv = mEUCTW_Decoder->Convert((const char *)aSrc, &aSrcLength, aDest, aDestLength);
55 return(rv);
56 }
57
58 NS_IMETHODIMP nsISO2022CNToUnicode::Convert(const char * aSrc, int32_t * aSrcLen, char16_t * aDest, int32_t * aDestLen)
59 {
60 const unsigned char * srcEnd = (unsigned char *)aSrc + *aSrcLen;
61 const unsigned char * src = (unsigned char *) aSrc;
62 char16_t* destEnd = aDest + *aDestLen;
63 char16_t* dest = aDest;
64 nsresult rv;
65 int32_t aLen;
66
67 while ((src < srcEnd))
68 {
69 switch (mState)
70 {
71 case eState_ASCII:
72 if(ESC == *src) {
73 mState = eState_ESC;
74 } else {
75 if (CHECK_OVERRUN(dest, destEnd, 1))
76 goto error1;
77 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
78
79 mState = eState_ASCII;
80 }
81 break;
82
83 case eState_ESC: // ESC
84 if('$' == *src) {
85 mState = eState_ESC_24;
86 } else {
87 if (CHECK_OVERRUN(dest, destEnd, 2))
88 goto error1;
89 *dest++ = (char16_t) ESC;
90 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
91
92 mState = eState_ASCII;
93 }
94 break;
95
96 case eState_ESC_24: // ESC $
97 if(')' == *src) {
98 mState = eState_ESC_24_29;
99 } else if('*' == *src) {
100 mState = eState_ESC_24_2A;
101 } else if('+' == *src) {
102 mState = eState_ESC_24_2B;
103 } else {
104 if (CHECK_OVERRUN(dest, destEnd, 3))
105 goto error1;
106 *dest++ = (char16_t) ESC;
107 *dest++ = (char16_t) '$';
108 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
109
110 mState = eState_ASCII;
111 }
112 break;
113
114 case eState_ESC_24_29: // ESC $ )
115 if('A' == *src) {
116 mState = eState_ESC_24_29_A;
117 } else if('G' == *src) {
118 mState = eState_ESC_24_29_G;
119 } else {
120 if (CHECK_OVERRUN(dest, destEnd, 4))
121 goto error1;
122 *dest++ = (char16_t) ESC;
123 *dest++ = (char16_t) '$';
124 *dest++ = (char16_t) ')';
125 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
126
127 mState = eState_ASCII;
128 }
129 break;
130
131 case eState_ESC_24_29_A: // ESC $ ) A
132 if(SO == *src) {
133 mState = eState_GB2312_1980;
134 mRunLength = 0;
135 } else {
136 if (CHECK_OVERRUN(dest, destEnd, 5))
137 goto error1;
138 *dest++ = (char16_t) ESC;
139 *dest++ = (char16_t) '$';
140 *dest++ = (char16_t) ')';
141 *dest++ = (char16_t) 'A';
142 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
143
144 mState = eState_ASCII;
145 }
146 break;
147
148 case eState_GB2312_1980: // ESC $ ) A SO
149 if(SI == *src) { // Shift-In (SI)
150 mState = eState_ESC_24_29_A_SO_SI;
151 if (mRunLength == 0) {
152 if (CHECK_OVERRUN(dest, destEnd, 1))
153 goto error1;
154 *dest++ = 0xFFFD;
155 }
156 mRunLength = 0;
157 } else if(ESC == *src) {
158 mState = eState_ESC;
159 } else {
160 if(0x20 < *src && *src < 0x7f) {
161 mData = *src;
162 mState = eState_GB2312_1980_2ndbyte;
163 } else {
164 if (CHECK_OVERRUN(dest, destEnd, 1))
165 goto error1;
166 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
167 }
168 }
169 break;
170
171 case eState_GB2312_1980_2ndbyte: // ESC $ ) A SO
172 if(0x20 < *src && *src < 0x7f) {
173 unsigned char gb[2];
174 int32_t gbLen = 2;
175
176 gb[0] = mData | 0x80;
177 gb[1] = *src | 0x80;
178
179 aLen = destEnd - dest;
180 rv = GB2312_To_Unicode(gb, gbLen, dest, &aLen);
181 ++mRunLength;
182 if(rv == NS_OK_UDEC_MOREOUTPUT) {
183 goto error1;
184 } else if(NS_FAILED(rv)) {
185 goto error2;
186 }
187
188 dest += aLen;
189 } else {
190 if (CHECK_OVERRUN(dest, destEnd, 2))
191 goto error1;
192 *dest++ = (char16_t) mData;
193 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
194 }
195 mState = eState_GB2312_1980;
196 break;
197
198 case eState_ESC_24_29_A_SO_SI: // ESC $ ) A SO SI
199 if(SO == *src) {
200 mState = eState_GB2312_1980;
201 mRunLength = 0;
202 } else if(ESC == *src) {
203 mState = eState_ESC;
204 } else {
205 if (CHECK_OVERRUN(dest, destEnd, 1))
206 goto error1;
207 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
208
209 mState = eState_ESC_24_29_A_SO_SI;
210 }
211 break;
212
213 case eState_ESC_24_29_G: // ESC $ ) G
214 if(SO == *src) {
215 mState = eState_CNS11643_1;
216 mRunLength = 0;
217 } else {
218 if (CHECK_OVERRUN(dest, destEnd, 5))
219 goto error1;
220 *dest++ = (char16_t) ESC;
221 *dest++ = (char16_t) '$';
222 *dest++ = (char16_t) ')';
223 *dest++ = (char16_t) 'G';
224 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
225
226 mState = eState_ASCII;
227 }
228 break;
229
230 case eState_CNS11643_1: // ESC $ ) G SO
231 if(SI == *src) { // Shift-In (SI)
232 mState = eState_ESC_24_29_G_SO_SI;
233 if (mRunLength == 0) {
234 if (CHECK_OVERRUN(dest, destEnd, 1))
235 goto error1;
236 *dest++ = 0xFFFD;
237 }
238 mRunLength = 0;
239 } else if(ESC == *src) {
240 mState = eState_ESC;
241 } else {
242 if(0x20 < *src && *src < 0x7f) {
243 mData = *src;
244 mState = eState_CNS11643_1_2ndbyte;
245 } else {
246 if (CHECK_OVERRUN(dest, destEnd, 1))
247 goto error1;
248 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
249 }
250 }
251 break;
252
253 case eState_CNS11643_1_2ndbyte: // ESC $ ) G SO
254 if(0x20 < *src && *src < 0x7f) {
255 unsigned char cns[4];
256 int32_t cnsLen = 2;
257
258 cns[0] = mData | 0x80;
259 cns[1] = *src | 0x80;
260
261 aLen = destEnd - dest;
262 rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
263 ++mRunLength;
264 if(rv == NS_OK_UDEC_MOREOUTPUT) {
265 goto error1;
266 } else if(NS_FAILED(rv)) {
267 goto error2;
268 }
269
270 dest += aLen;
271 } else {
272 if (CHECK_OVERRUN(dest, destEnd, 2))
273 goto error1;
274 *dest++ = (char16_t) mData;
275 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
276 }
277 mState = eState_CNS11643_1;
278 break;
279
280 case eState_ESC_24_29_G_SO_SI: // ESC $ ) G SO SI
281 if(SO == *src) {
282 mState = eState_CNS11643_1;
283 mRunLength = 0;
284 } else if(ESC == *src) {
285 mState = eState_ESC;
286 } else {
287 if (CHECK_OVERRUN(dest, destEnd, 1))
288 goto error1;
289 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
290
291 mState = eState_ESC_24_29_G_SO_SI;
292 }
293 break;
294
295 case eState_ESC_24_2A: // ESC $ *
296 if('H' == *src) {
297 mState = eState_ESC_24_2A_H;
298 } else {
299 if (CHECK_OVERRUN(dest, destEnd, 4))
300 goto error1;
301 *dest++ = (char16_t) ESC;
302 *dest++ = (char16_t) '$';
303 *dest++ = (char16_t) '*';
304 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
305
306 mState = eState_ASCII;
307 }
308 break;
309
310 case eState_ESC_24_2A_H: // ESC $ * H
311 if(ESC == *src) {
312 mState = eState_ESC_24_2A_H_ESC;
313 } else {
314 if (CHECK_OVERRUN(dest, destEnd, 5))
315 goto error1;
316 *dest++ = (char16_t) ESC;
317 *dest++ = (char16_t) '$';
318 *dest++ = (char16_t) '*';
319 *dest++ = (char16_t) 'H';
320 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
321
322 mState = eState_ASCII;
323 }
324 break;
325
326 case eState_ESC_24_2A_H_ESC: // ESC $ * H ESC
327 if(SS2 == *src) {
328 mState = eState_CNS11643_2;
329 mRunLength = 0;
330 } else if('$' == *src) {
331 mState = eState_ESC_24;
332 } else {
333 if (CHECK_OVERRUN(dest, destEnd, 6))
334 goto error1;
335 *dest++ = (char16_t) ESC;
336 *dest++ = (char16_t) '$';
337 *dest++ = (char16_t) '*';
338 *dest++ = (char16_t) 'H';
339 *dest++ = (char16_t) ESC;
340 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
341
342 mState = eState_ASCII;
343 }
344 break;
345
346 case eState_CNS11643_2: // ESC $ * H ESC SS2
347 if(SI == *src) { // Shift-In (SI)
348 mState = eState_ESC_24_2A_H_ESC_SS2_SI;
349 if (mRunLength == 0) {
350 if (CHECK_OVERRUN(dest, destEnd, 1))
351 goto error1;
352 *dest++ = 0xFFFD;
353 }
354 mRunLength = 0;
355 } else if(ESC == *src) {
356 mState = eState_ESC_24_2A_H_ESC;
357 } else {
358 if(0x20 < *src && *src < 0x7f) {
359 mData = *src;
360 mState = eState_CNS11643_2_2ndbyte;
361 } else {
362 if (CHECK_OVERRUN(dest, destEnd, 1))
363 goto error1;
364 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
365 }
366 }
367 break;
368
369 case eState_CNS11643_2_2ndbyte: // ESC $ * H ESC SS2
370 if(0x20 < *src && *src < 0x7f) {
371 unsigned char cns[4];
372 int32_t cnsLen = 4;
373
374 cns[0] = (unsigned char) MBYTE;
375 cns[1] = (unsigned char) (PMASK + 2);
376 cns[2] = mData | 0x80;
377 cns[3] = *src | 0x80;
378
379 aLen = destEnd - dest;
380 rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
381 ++mRunLength;
382 if(rv == NS_OK_UDEC_MOREOUTPUT) {
383 goto error1;
384 } else if(NS_FAILED(rv)) {
385 goto error2;
386 }
387
388 dest += aLen;
389 } else {
390 if (CHECK_OVERRUN(dest, destEnd, 2))
391 goto error1;
392 *dest++ = (char16_t) mData;
393 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
394 }
395 mState = eState_CNS11643_2;
396 break;
397
398 case eState_ESC_24_2A_H_ESC_SS2_SI: // ESC $ * H ESC SS2 SI
399 if(ESC == *src) {
400 mState = eState_ESC_24_2A_H_ESC_SS2_SI_ESC;
401 } else {
402 if (CHECK_OVERRUN(dest, destEnd, 1))
403 goto error1;
404 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
405
406 mState = eState_ESC_24_2A_H_ESC_SS2_SI;
407 }
408 break;
409
410 case eState_ESC_24_2A_H_ESC_SS2_SI_ESC: // ESC $ * H ESC SS2 SI ESC
411 if(SS2 == *src) {
412 mState = eState_CNS11643_2;
413 mRunLength = 0;
414 } else if('$' == *src) {
415 mState = eState_ESC_24;
416 } else {
417 if (CHECK_OVERRUN(dest, destEnd, 1))
418 goto error1;
419 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
420
421 mState = eState_ESC_24_2A_H_ESC_SS2_SI;
422 }
423 break;
424
425 case eState_ESC_24_2B: // ESC $ +
426 if('I' <= *src && *src <= 'M') {
427 mState = eState_ESC_24_2B_I;
428 mPlaneID = *src - 'I' + 3;
429 } else {
430 if (CHECK_OVERRUN(dest, destEnd, 4))
431 goto error1;
432 *dest++ = (char16_t) ESC;
433 *dest++ = (char16_t) '$';
434 *dest++ = (char16_t) '+';
435 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
436
437 mState = eState_ASCII;
438 }
439 break;
440
441 case eState_ESC_24_2B_I: // ESC $ + I
442 if(ESC == *src) {
443 mState = eState_ESC_24_2B_I_ESC;
444 } else {
445 if (CHECK_OVERRUN(dest, destEnd, 5))
446 goto error1;
447 *dest++ = (char16_t) ESC;
448 *dest++ = (char16_t) '$';
449 *dest++ = (char16_t) '+';
450 *dest++ = (char16_t) 'I' + mPlaneID - 3;
451 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
452
453 mState = eState_ASCII;
454 }
455 break;
456
457 case eState_ESC_24_2B_I_ESC: // ESC $ + I ESC
458 if(SS3 == *src) {
459 mState = eState_CNS11643_3;
460 mRunLength = 0;
461 } else if('$' == *src) {
462 mState = eState_ESC_24;
463 } else {
464 if (CHECK_OVERRUN(dest, destEnd, 6))
465 goto error1;
466 *dest++ = (char16_t) ESC;
467 *dest++ = (char16_t) '$';
468 *dest++ = (char16_t) '+';
469 *dest++ = (char16_t) 'I' + mPlaneID - 3;
470 *dest++ = (char16_t) ESC;
471 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
472
473 mState = eState_ASCII;
474 }
475 break;
476
477 case eState_CNS11643_3: // ESC $ + I ESC SS3
478 if(SI == *src) { // Shift-In (SI)
479 mState = eState_ESC_24_2B_I_ESC_SS3_SI;
480 if (mRunLength == 0) {
481 if (CHECK_OVERRUN(dest, destEnd, 1))
482 goto error1;
483 *dest++ = 0xFFFD;
484 }
485 mRunLength = 0;
486 } else if(ESC == *src) {
487 mState = eState_ESC_24_2B_I_ESC;
488 } else {
489 if(0x20 < *src && *src < 0x7f) {
490 mData = *src;
491 mState = eState_CNS11643_3_2ndbyte;
492 } else {
493 if (CHECK_OVERRUN(dest, destEnd, 1))
494 goto error1;
495 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
496 }
497 }
498
499 break;
500
501 case eState_CNS11643_3_2ndbyte: // ESC $ + I ESC SS3
502 if(0x20 < *src && *src < 0x7f) {
503 unsigned char cns[4];
504 int32_t cnsLen = 4;
505
506 cns[0] = (unsigned char) MBYTE;
507 cns[1] = (unsigned char) (PMASK + mPlaneID);
508 cns[2] = mData | 0x80;
509 cns[3] = *src | 0x80;
510
511 aLen = destEnd - dest;
512 rv = EUCTW_To_Unicode(cns, cnsLen, dest, &aLen);
513 ++mRunLength;
514 if(rv == NS_OK_UDEC_MOREOUTPUT) {
515 goto error1;
516 } else if(NS_FAILED(rv)) {
517 goto error2;
518 }
519
520 dest += aLen;
521 } else {
522 if (CHECK_OVERRUN(dest, destEnd, 2))
523 goto error1;
524 *dest++ = (char16_t) mData;
525 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
526 }
527 mState = eState_CNS11643_3;
528 break;
529
530 case eState_ESC_24_2B_I_ESC_SS3_SI: // ESC $ + I ESC SS3 SI
531 if(ESC == *src) {
532 mState = eState_ESC_24_2B_I_ESC_SS3_SI_ESC;
533 } else {
534 if (CHECK_OVERRUN(dest, destEnd, 1))
535 goto error1;
536 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
537
538 mState = eState_ESC_24_2B_I_ESC_SS3_SI;
539 }
540 break;
541
542 case eState_ESC_24_2B_I_ESC_SS3_SI_ESC: // ESC $ + I ESC SS3 SI ESC
543 if(SS3 == *src) {
544 mState = eState_CNS11643_3;
545 mRunLength = 0;
546 } else if('$' == *src) {
547 mState = eState_ESC_24;
548 } else {
549 if (CHECK_OVERRUN(dest, destEnd, 1))
550 goto error1;
551 *dest++ = (0x80 & *src) ? 0xFFFD : (char16_t) *src;
552
553 mState = eState_ESC_24_2B_I_ESC_SS3_SI;
554 }
555 break;
556
557 case eState_ERROR:
558 NS_NOTREACHED("unhandled case");
559 goto error2;
560
561 } // switch
562 src++;
563 }
564
565 *aDestLen = dest- aDest;
566 return NS_OK;
567
568 error1:
569 *aDestLen = dest-aDest;
570 *aSrcLen = src - (const unsigned char*)aSrc;
571 return NS_OK_UDEC_MOREOUTPUT;
572
573 error2:
574 *aSrcLen = src - (const unsigned char*)aSrc;
575 *aDestLen = dest-aDest;
576 mState = eState_ASCII;
577 return NS_ERROR_UNEXPECTED;
578 }

mercurial