intl/uconv/ucvja/nsJapaneseToUnicode.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include "nsJapaneseToUnicode.h"
michael@0 6
michael@0 7 #include "nsUCSupport.h"
michael@0 8
michael@0 9 #include "japanese.map"
michael@0 10
michael@0 11 #include "nsICharsetConverterManager.h"
michael@0 12 #include "nsServiceManagerUtils.h"
michael@0 13
michael@0 14 #include "mozilla/Assertions.h"
michael@0 15
michael@0 16 // HTML5 says to use Windows-31J instead of the real Shift_JIS for decoding
michael@0 17 #define SJIS_INDEX gCP932Index[0]
michael@0 18 #define JIS0208_INDEX gCP932Index[1]
michael@0 19
michael@0 20 #define JIS0212_INDEX gJIS0212Index
michael@0 21 #define SJIS_UNMAPPED 0x30fb
michael@0 22 #define UNICODE_REPLACEMENT_CHARACTER 0xfffd
michael@0 23 #define IN_GR_RANGE(b) \
michael@0 24 ((uint8_t(0xa1) <= uint8_t(b)) && (uint8_t(b) <= uint8_t(0xfe)))
michael@0 25
michael@0 26 NS_IMETHODIMP nsShiftJISToUnicode::Convert(
michael@0 27 const char * aSrc, int32_t * aSrcLen,
michael@0 28 char16_t * aDest, int32_t * aDestLen)
michael@0 29 {
michael@0 30 static const uint8_t sbIdx[256] =
michael@0 31 {
michael@0 32 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00 */
michael@0 33 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08 */
michael@0 34 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10 */
michael@0 35 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18 */
michael@0 36 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20 */
michael@0 37 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x28 */
michael@0 38 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x30 */
michael@0 39 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38 */
michael@0 40 0, 1, 2, 3, 4, 5, 6, 7, /* 0x40 */
michael@0 41 8, 9, 10, 11, 12, 13, 14, 15, /* 0x48 */
michael@0 42 16, 17, 18, 19, 20, 21, 22, 23, /* 0x50 */
michael@0 43 24, 25, 26, 27, 28, 29, 30, 31, /* 0x58 */
michael@0 44 32, 33, 34, 35, 36, 37, 38, 39, /* 0x60 */
michael@0 45 40, 41, 42, 43, 44, 45, 46, 47, /* 0x68 */
michael@0 46 48, 49, 50, 51, 52, 53, 54, 55, /* 0x70 */
michael@0 47 56, 57, 58, 59, 60, 61, 62, 0xFF, /* 0x78 */
michael@0 48 63, 64, 65, 66, 67, 68, 69, 70, /* 0x80 */
michael@0 49 71, 72, 73, 74, 75, 76, 77, 78, /* 0x88 */
michael@0 50 79, 80, 81, 82, 83, 84, 85, 86, /* 0x90 */
michael@0 51 87, 88, 89, 90, 91, 92, 93, 94, /* 0x98 */
michael@0 52 95, 96, 97, 98, 99, 100, 101, 102, /* 0xa0 */
michael@0 53 103, 104, 105, 106, 107, 108, 109, 110, /* 0xa8 */
michael@0 54 111, 112, 113, 114, 115, 116, 117, 118, /* 0xb0 */
michael@0 55 119, 120, 121, 122, 123, 124, 125, 126, /* 0xb8 */
michael@0 56 127, 128, 129, 130, 131, 132, 133, 134, /* 0xc0 */
michael@0 57 135, 136, 137, 138, 139, 140, 141, 142, /* 0xc8 */
michael@0 58 143, 144, 145, 146, 147, 148, 149, 150, /* 0xd0 */
michael@0 59 151, 152, 153, 154, 155, 156, 157, 158, /* 0xd8 */
michael@0 60 159, 160, 161, 162, 163, 164, 165, 166, /* 0xe0 */
michael@0 61 167, 168, 169, 170, 171, 172, 173, 174, /* 0xe8 */
michael@0 62 175, 176, 177, 178, 179, 180, 181, 182, /* 0xf0 */
michael@0 63 183, 184, 185, 186, 187, 0xFF, 0xFF, 0xFF, /* 0xf8 */
michael@0 64 };
michael@0 65
michael@0 66 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
michael@0 67 const unsigned char* src =(unsigned char*) aSrc;
michael@0 68 char16_t* destEnd = aDest + *aDestLen;
michael@0 69 char16_t* dest = aDest;
michael@0 70 while (src < srcEnd) {
michael@0 71 switch (mState) {
michael@0 72 case 0:
michael@0 73 if (*src <= 0x80) {
michael@0 74 // ASCII
michael@0 75 *dest++ = (char16_t) *src;
michael@0 76 if (dest >= destEnd) {
michael@0 77 goto error1;
michael@0 78 }
michael@0 79 } else {
michael@0 80 mData = SJIS_INDEX[*src & 0x7F];
michael@0 81 if (mData < 0xE000) {
michael@0 82 mState = 1; // two bytes
michael@0 83 } else if (mData < 0xF000) {
michael@0 84 mState = 2; // EUDC
michael@0 85 } else {
michael@0 86 *dest++ = mData; // JIS 0201
michael@0 87 if (dest >= destEnd) {
michael@0 88 goto error1;
michael@0 89 }
michael@0 90 }
michael@0 91 }
michael@0 92 break;
michael@0 93
michael@0 94 case 1: // Index to table
michael@0 95 {
michael@0 96 MOZ_ASSERT(mData < 0xE000);
michael@0 97 uint8_t off = sbIdx[*src];
michael@0 98
michael@0 99 // Error handling: in the case where the second octet is not in the
michael@0 100 // valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and
michael@0 101 // interpret it as the ASCII value. In the case where the second
michael@0 102 // octet is in the valid range but there is no mapping for the
michael@0 103 // 2-octet sequence, do not unconsume.
michael@0 104 if(0xFF == off) {
michael@0 105 src--;
michael@0 106 if (mErrBehavior == kOnError_Signal)
michael@0 107 goto error_invalidchar;
michael@0 108 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 109 } else {
michael@0 110 char16_t ch = gJapaneseMap[mData+off];
michael@0 111 if(ch == 0xfffd) {
michael@0 112 if (mErrBehavior == kOnError_Signal)
michael@0 113 goto error_invalidchar;
michael@0 114 ch = SJIS_UNMAPPED;
michael@0 115 }
michael@0 116 *dest++ = ch;
michael@0 117 }
michael@0 118 mState = 0;
michael@0 119 if(dest >= destEnd)
michael@0 120 goto error1;
michael@0 121 }
michael@0 122 break;
michael@0 123
michael@0 124 case 2: // EUDC
michael@0 125 {
michael@0 126 MOZ_ASSERT(0xE000 <= mData && mData < 0xF000);
michael@0 127 uint8_t off = sbIdx[*src];
michael@0 128
michael@0 129 // Error handling as in case 1
michael@0 130 if(0xFF == off) {
michael@0 131 src--;
michael@0 132 if (mErrBehavior == kOnError_Signal)
michael@0 133 goto error_invalidchar;
michael@0 134
michael@0 135 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 136 } else {
michael@0 137 *dest++ = mData + off;
michael@0 138 }
michael@0 139 mState = 0;
michael@0 140 if(dest >= destEnd)
michael@0 141 goto error1;
michael@0 142 }
michael@0 143 break;
michael@0 144
michael@0 145 }
michael@0 146 src++;
michael@0 147 }
michael@0 148 *aDestLen = dest - aDest;
michael@0 149 return NS_OK;
michael@0 150 error_invalidchar:
michael@0 151 *aDestLen = dest - aDest;
michael@0 152 *aSrcLen = src - (const unsigned char*)aSrc;
michael@0 153 return NS_ERROR_ILLEGAL_INPUT;
michael@0 154 error1:
michael@0 155 *aDestLen = dest - aDest;
michael@0 156 src++;
michael@0 157 if ((mState == 0) && (src == srcEnd)) {
michael@0 158 return NS_OK;
michael@0 159 }
michael@0 160 *aSrcLen = src - (const unsigned char*)aSrc;
michael@0 161 return NS_OK_UDEC_MOREOUTPUT;
michael@0 162 }
michael@0 163
michael@0 164 char16_t
michael@0 165 nsShiftJISToUnicode::GetCharacterForUnMapped()
michael@0 166 {
michael@0 167 return char16_t(SJIS_UNMAPPED);
michael@0 168 }
michael@0 169
michael@0 170 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
michael@0 171 const char * aSrc, int32_t * aSrcLen,
michael@0 172 char16_t * aDest, int32_t * aDestLen)
michael@0 173 {
michael@0 174 static const uint8_t sbIdx[256] =
michael@0 175 {
michael@0 176 /* 0x0X */
michael@0 177 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 178 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 179 /* 0x1X */
michael@0 180 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 181 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 182 /* 0x2X */
michael@0 183 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 184 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 185 /* 0x3X */
michael@0 186 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 187 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 188 /* 0x4X */
michael@0 189 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 190 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 191 /* 0x5X */
michael@0 192 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 193 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 194 /* 0x6X */
michael@0 195 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 196 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 197 /* 0x7X */
michael@0 198 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 199 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 200 /* 0x8X */
michael@0 201 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 202 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 203 /* 0x9X */
michael@0 204 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 205 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 206 /* 0xAX */
michael@0 207 0xFF, 0, 1, 2, 3, 4, 5, 6,
michael@0 208 7, 8 , 9, 10, 11, 12, 13, 14,
michael@0 209 /* 0xBX */
michael@0 210 15, 16, 17, 18, 19, 20, 21, 22,
michael@0 211 23, 24, 25, 26, 27, 28, 29, 30,
michael@0 212 /* 0xCX */
michael@0 213 31, 32, 33, 34, 35, 36, 37, 38,
michael@0 214 39, 40, 41, 42, 43, 44, 45, 46,
michael@0 215 /* 0xDX */
michael@0 216 47, 48, 49, 50, 51, 52, 53, 54,
michael@0 217 55, 56, 57, 58, 59, 60, 61, 62,
michael@0 218 /* 0xEX */
michael@0 219 63, 64, 65, 66, 67, 68, 69, 70,
michael@0 220 71, 72, 73, 74, 75, 76, 77, 78,
michael@0 221 /* 0xFX */
michael@0 222 79, 80, 81, 82, 83, 84, 85, 86,
michael@0 223 87, 88, 89, 90, 91, 92, 93, 0xFF,
michael@0 224 };
michael@0 225
michael@0 226 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
michael@0 227 const unsigned char* src =(unsigned char*) aSrc;
michael@0 228 char16_t* destEnd = aDest + *aDestLen;
michael@0 229 char16_t* dest = aDest;
michael@0 230 while((src < srcEnd))
michael@0 231 {
michael@0 232 switch(mState)
michael@0 233 {
michael@0 234 case 0:
michael@0 235 if(*src & 0x80 && *src != (unsigned char)0xa0)
michael@0 236 {
michael@0 237 mData = JIS0208_INDEX[*src & 0x7F];
michael@0 238 if(mData != 0xFFFD )
michael@0 239 {
michael@0 240 mState = 1; // two byte JIS0208
michael@0 241 } else {
michael@0 242 if( 0x8e == *src) {
michael@0 243 // JIS 0201
michael@0 244 mState = 2; // JIS0201
michael@0 245 } else if(0x8f == *src) {
michael@0 246 // JIS 0212
michael@0 247 mState = 3; // JIS0212
michael@0 248 } else {
michael@0 249 // others
michael@0 250 if (mErrBehavior == kOnError_Signal)
michael@0 251 goto error_invalidchar;
michael@0 252 *dest++ = 0xFFFD;
michael@0 253 if(dest >= destEnd)
michael@0 254 goto error1;
michael@0 255 }
michael@0 256 }
michael@0 257 } else {
michael@0 258 // ASCII
michael@0 259 *dest++ = (char16_t) *src;
michael@0 260 if(dest >= destEnd)
michael@0 261 goto error1;
michael@0 262 }
michael@0 263 break;
michael@0 264
michael@0 265 case 1: // Index to table
michael@0 266 {
michael@0 267 uint8_t off = sbIdx[*src];
michael@0 268 if(0xFF == off) {
michael@0 269 if (mErrBehavior == kOnError_Signal)
michael@0 270 goto error_invalidchar;
michael@0 271 *dest++ = 0xFFFD;
michael@0 272 // if the first byte is valid for EUC-JP but the second
michael@0 273 // is not while being a valid US-ASCII, save it
michael@0 274 // instead of eating it up !
michael@0 275 if ( (uint8_t)*src < (uint8_t)0x7f )
michael@0 276 --src;
michael@0 277 } else {
michael@0 278 *dest++ = gJapaneseMap[mData+off];
michael@0 279 }
michael@0 280 mState = 0;
michael@0 281 if(dest >= destEnd)
michael@0 282 goto error1;
michael@0 283 }
michael@0 284 break;
michael@0 285
michael@0 286 case 2: // JIS 0201
michael@0 287 {
michael@0 288 if((0xA1 <= *src) && (*src <= 0xDF)) {
michael@0 289 *dest++ = (0xFF61-0x00A1) + *src;
michael@0 290 } else {
michael@0 291 if (mErrBehavior == kOnError_Signal)
michael@0 292 goto error_invalidchar;
michael@0 293 *dest++ = 0xFFFD;
michael@0 294 // if 0x8e is not followed by a valid JIS X 0201 byte
michael@0 295 // but by a valid US-ASCII, save it instead of eating it up.
michael@0 296 if ( (uint8_t)*src < (uint8_t)0x7f )
michael@0 297 --src;
michael@0 298 }
michael@0 299 mState = 0;
michael@0 300 if(dest >= destEnd)
michael@0 301 goto error1;
michael@0 302 }
michael@0 303 break;
michael@0 304
michael@0 305 case 3: // JIS 0212
michael@0 306 {
michael@0 307 if (IN_GR_RANGE(*src))
michael@0 308 {
michael@0 309 mData = JIS0212_INDEX[*src & 0x7F];
michael@0 310 if(mData != 0xFFFD )
michael@0 311 {
michael@0 312 mState = 4;
michael@0 313 } else {
michael@0 314 mState = 5; // error
michael@0 315 }
michael@0 316 } else {
michael@0 317 // First "JIS 0212" byte is not in the valid GR range: save it
michael@0 318 if (mErrBehavior == kOnError_Signal)
michael@0 319 goto error_invalidchar;
michael@0 320 *dest++ = 0xFFFD;
michael@0 321 --src;
michael@0 322 mState = 0;
michael@0 323 if(dest >= destEnd)
michael@0 324 goto error1;
michael@0 325 }
michael@0 326 }
michael@0 327 break;
michael@0 328 case 4:
michael@0 329 {
michael@0 330 uint8_t off = sbIdx[*src];
michael@0 331 if(0xFF != off) {
michael@0 332 *dest++ = gJapaneseMap[mData+off];
michael@0 333 mState = 0;
michael@0 334 if(dest >= destEnd)
michael@0 335 goto error1;
michael@0 336 break;
michael@0 337 }
michael@0 338 // else fall through to error handler
michael@0 339 }
michael@0 340 case 5: // two bytes undefined
michael@0 341 {
michael@0 342 if (mErrBehavior == kOnError_Signal)
michael@0 343 goto error_invalidchar;
michael@0 344 *dest++ = 0xFFFD;
michael@0 345 // Undefined JIS 0212 two byte sequence. If the second byte is in
michael@0 346 // the valid range for a two byte sequence (0xa1 - 0xfe) consume
michael@0 347 // both bytes. Otherwise resynchronize on the second byte.
michael@0 348 if (!IN_GR_RANGE(*src))
michael@0 349 --src;
michael@0 350 mState = 0;
michael@0 351 if(dest >= destEnd)
michael@0 352 goto error1;
michael@0 353 }
michael@0 354 break;
michael@0 355 }
michael@0 356 src++;
michael@0 357 }
michael@0 358 *aDestLen = dest - aDest;
michael@0 359 return NS_OK;
michael@0 360 error_invalidchar:
michael@0 361 *aDestLen = dest - aDest;
michael@0 362 *aSrcLen = src - (const unsigned char*)aSrc;
michael@0 363 return NS_ERROR_ILLEGAL_INPUT;
michael@0 364 error1:
michael@0 365 *aDestLen = dest - aDest;
michael@0 366 src++;
michael@0 367 if ((mState == 0) && (src == srcEnd)) {
michael@0 368 return NS_OK;
michael@0 369 }
michael@0 370 *aSrcLen = src - (const unsigned char*)aSrc;
michael@0 371 return NS_OK_UDEC_MOREOUTPUT;
michael@0 372 }
michael@0 373
michael@0 374
michael@0 375
michael@0 376 NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
michael@0 377 const char * aSrc, int32_t * aSrcLen,
michael@0 378 char16_t * aDest, int32_t * aDestLen)
michael@0 379 {
michael@0 380 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
michael@0 381
michael@0 382 static const uint16_t fbIdx[128] =
michael@0 383 {
michael@0 384 /* 0x8X */
michael@0 385 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
michael@0 386 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
michael@0 387 /* 0x9X */
michael@0 388 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
michael@0 389 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
michael@0 390 /* 0xAX */
michael@0 391 0xFFFD, 0, 94, 94* 2, 94* 3, 94* 4, 94* 5, 94* 6,
michael@0 392 94* 7, 94* 8 , 94* 9, 94*10, 94*11, 94*12, 94*13, 94*14,
michael@0 393 /* 0xBX */
michael@0 394 94*15, 94*16, 94*17, 94*18, 94*19, 94*20, 94*21, 94*22,
michael@0 395 94*23, 94*24, 94*25, 94*26, 94*27, 94*28, 94*29, 94*30,
michael@0 396 /* 0xCX */
michael@0 397 94*31, 94*32, 94*33, 94*34, 94*35, 94*36, 94*37, 94*38,
michael@0 398 94*39, 94*40, 94*41, 94*42, 94*43, 94*44, 94*45, 94*46,
michael@0 399 /* 0xDX */
michael@0 400 94*47, 94*48, 94*49, 94*50, 94*51, 94*52, 94*53, 94*54,
michael@0 401 94*55, 94*56, 94*57, 94*58, 94*59, 94*60, 94*61, 94*62,
michael@0 402 /* 0xEX */
michael@0 403 94*63, 94*64, 94*65, 94*66, 94*67, 94*68, 94*69, 94*70,
michael@0 404 94*71, 94*72, 94*73, 94*74, 94*75, 94*76, 94*77, 94*78,
michael@0 405 /* 0xFX */
michael@0 406 94*79, 94*80, 94*81, 94*82, 94*83, 94*84, 94*85, 94*86,
michael@0 407 94*87, 94*88, 94*89, 94*90, 94*91, 94*92, 94*93, 0xFFFD,
michael@0 408 };
michael@0 409 static const uint8_t sbIdx[256] =
michael@0 410 {
michael@0 411 /* 0x0X */
michael@0 412 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 413 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 414 /* 0x1X */
michael@0 415 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 416 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 417 /* 0x2X */
michael@0 418 0xFF, 0, 1, 2, 3, 4, 5, 6,
michael@0 419 7, 8 , 9, 10, 11, 12, 13, 14,
michael@0 420 /* 0x3X */
michael@0 421 15, 16, 17, 18, 19, 20, 21, 22,
michael@0 422 23, 24, 25, 26, 27, 28, 29, 30,
michael@0 423 /* 0x4X */
michael@0 424 31, 32, 33, 34, 35, 36, 37, 38,
michael@0 425 39, 40, 41, 42, 43, 44, 45, 46,
michael@0 426 /* 0x5X */
michael@0 427 47, 48, 49, 50, 51, 52, 53, 54,
michael@0 428 55, 56, 57, 58, 59, 60, 61, 62,
michael@0 429 /* 0x6X */
michael@0 430 63, 64, 65, 66, 67, 68, 69, 70,
michael@0 431 71, 72, 73, 74, 75, 76, 77, 78,
michael@0 432 /* 0x7X */
michael@0 433 79, 80, 81, 82, 83, 84, 85, 86,
michael@0 434 87, 88, 89, 90, 91, 92, 93, 0xFF,
michael@0 435 /* 0x8X */
michael@0 436 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 437 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 438 /* 0x9X */
michael@0 439 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 440 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 441 /* 0xAX */
michael@0 442 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 443 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 444 /* 0xBX */
michael@0 445 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 446 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 447 /* 0xCX */
michael@0 448 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 449 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 450 /* 0xDX */
michael@0 451 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 452 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 453 /* 0xEX */
michael@0 454 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 455 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 456 /* 0xFX */
michael@0 457 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 458 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
michael@0 459 };
michael@0 460
michael@0 461 const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
michael@0 462 const unsigned char* src =(unsigned char*) aSrc;
michael@0 463 char16_t* destEnd = aDest + *aDestLen;
michael@0 464 char16_t* dest = aDest;
michael@0 465 while((src < srcEnd))
michael@0 466 {
michael@0 467
michael@0 468 switch(mState)
michael@0 469 {
michael@0 470 case mState_ASCII:
michael@0 471 if(0x1b == *src)
michael@0 472 {
michael@0 473 mLastLegalState = mState;
michael@0 474 mState = mState_ESC;
michael@0 475 } else if(*src & 0x80) {
michael@0 476 if (mErrBehavior == kOnError_Signal)
michael@0 477 goto error3;
michael@0 478 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 479 goto error1;
michael@0 480 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 481 } else {
michael@0 482 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 483 goto error1;
michael@0 484 *dest++ = (char16_t) *src;
michael@0 485 }
michael@0 486 break;
michael@0 487
michael@0 488 case mState_ESC:
michael@0 489 if( '(' == *src) {
michael@0 490 mState = mState_ESC_28;
michael@0 491 } else if ('$' == *src) {
michael@0 492 mState = mState_ESC_24;
michael@0 493 } else if ('.' == *src) { // for ISO-2022-JP-2
michael@0 494 mState = mState_ESC_2e;
michael@0 495 } else if ('N' == *src) { // for ISO-2022-JP-2
michael@0 496 mState = mState_ESC_4e;
michael@0 497 } else {
michael@0 498 if (CHECK_OVERRUN(dest, destEnd, 2))
michael@0 499 goto error1;
michael@0 500 *dest++ = (char16_t) 0x1b;
michael@0 501 if (0x80 & *src) {
michael@0 502 if (mErrBehavior == kOnError_Signal)
michael@0 503 goto error3;
michael@0 504 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 505 } else {
michael@0 506 *dest++ = (char16_t) *src;
michael@0 507 }
michael@0 508 mState = mLastLegalState;
michael@0 509 }
michael@0 510 break;
michael@0 511
michael@0 512 case mState_ESC_28: // ESC (
michael@0 513 if( 'B' == *src) {
michael@0 514 mState = mState_ASCII;
michael@0 515 if (mRunLength == 0) {
michael@0 516 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 517 goto error1;
michael@0 518 *dest++ = 0xFFFD;
michael@0 519 }
michael@0 520 mRunLength = 0;
michael@0 521 } else if ('J' == *src) {
michael@0 522 mState = mState_JISX0201_1976Roman;
michael@0 523 if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
michael@0 524 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 525 goto error1;
michael@0 526 if (mErrBehavior == kOnError_Signal)
michael@0 527 goto error3;
michael@0 528 *dest++ = 0xFFFD;
michael@0 529 }
michael@0 530 mRunLength = 0;
michael@0 531 } else if ('I' == *src) {
michael@0 532 mState = mState_JISX0201_1976Kana;
michael@0 533 mRunLength = 0;
michael@0 534 } else {
michael@0 535 if (CHECK_OVERRUN(dest, destEnd, 3))
michael@0 536 goto error1;
michael@0 537 *dest++ = (char16_t) 0x1b;
michael@0 538 *dest++ = (char16_t) '(';
michael@0 539 if (0x80 & *src) {
michael@0 540 if (mErrBehavior == kOnError_Signal)
michael@0 541 goto error3;
michael@0 542 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 543 } else {
michael@0 544 *dest++ = (char16_t) *src;
michael@0 545 }
michael@0 546 mState = mLastLegalState;
michael@0 547 }
michael@0 548 break;
michael@0 549
michael@0 550 case mState_ESC_24: // ESC $
michael@0 551 if( '@' == *src) {
michael@0 552 mState = mState_JISX0208_1978;
michael@0 553 mRunLength = 0;
michael@0 554 } else if ('A' == *src) {
michael@0 555 mState = mState_GB2312_1980;
michael@0 556 mRunLength = 0;
michael@0 557 } else if ('B' == *src) {
michael@0 558 mState = mState_JISX0208_1983;
michael@0 559 mRunLength = 0;
michael@0 560 } else if ('(' == *src) {
michael@0 561 mState = mState_ESC_24_28;
michael@0 562 } else {
michael@0 563 if (CHECK_OVERRUN(dest, destEnd, 3))
michael@0 564 goto error1;
michael@0 565 *dest++ = (char16_t) 0x1b;
michael@0 566 *dest++ = (char16_t) '$';
michael@0 567 if (0x80 & *src) {
michael@0 568 if (mErrBehavior == kOnError_Signal)
michael@0 569 goto error3;
michael@0 570 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 571 } else {
michael@0 572 *dest++ = (char16_t) *src;
michael@0 573 }
michael@0 574 mState = mLastLegalState;
michael@0 575 }
michael@0 576 break;
michael@0 577
michael@0 578 case mState_ESC_24_28: // ESC $ (
michael@0 579 if( 'C' == *src) {
michael@0 580 mState = mState_KSC5601_1987;
michael@0 581 mRunLength = 0;
michael@0 582 } else if ('D' == *src) {
michael@0 583 mState = mState_JISX0212_1990;
michael@0 584 mRunLength = 0;
michael@0 585 } else {
michael@0 586 if (CHECK_OVERRUN(dest, destEnd, 4))
michael@0 587 goto error1;
michael@0 588 *dest++ = (char16_t) 0x1b;
michael@0 589 *dest++ = (char16_t) '$';
michael@0 590 *dest++ = (char16_t) '(';
michael@0 591 if (0x80 & *src) {
michael@0 592 if (mErrBehavior == kOnError_Signal)
michael@0 593 goto error3;
michael@0 594 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 595 } else {
michael@0 596 *dest++ = (char16_t) *src;
michael@0 597 }
michael@0 598 mState = mLastLegalState;
michael@0 599 }
michael@0 600 break;
michael@0 601
michael@0 602 case mState_JISX0201_1976Roman:
michael@0 603 if(0x1b == *src) {
michael@0 604 mLastLegalState = mState;
michael@0 605 mState = mState_ESC;
michael@0 606 } else if(*src & 0x80) {
michael@0 607 if (mErrBehavior == kOnError_Signal)
michael@0 608 goto error3;
michael@0 609 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 610 goto error1;
michael@0 611 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 612 ++mRunLength;
michael@0 613 } else {
michael@0 614 // XXX We need to decide how to handle \ and ~ here
michael@0 615 // we may need a if statement here for '\' and '~'
michael@0 616 // to map them to Yen and Overbar
michael@0 617 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 618 goto error1;
michael@0 619 *dest++ = (char16_t) *src;
michael@0 620 ++mRunLength;
michael@0 621 }
michael@0 622 break;
michael@0 623
michael@0 624 case mState_JISX0201_1976Kana:
michael@0 625 if(0x1b == *src) {
michael@0 626 mLastLegalState = mState;
michael@0 627 mState = mState_ESC;
michael@0 628 } else {
michael@0 629 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 630 goto error1;
michael@0 631 if((0x21 <= *src) && (*src <= 0x5F)) {
michael@0 632 *dest++ = (0xFF61-0x0021) + *src;
michael@0 633 } else {
michael@0 634 if (mErrBehavior == kOnError_Signal)
michael@0 635 goto error3;
michael@0 636 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 637 }
michael@0 638 ++mRunLength;
michael@0 639 }
michael@0 640 break;
michael@0 641
michael@0 642 case mState_JISX0208_1978:
michael@0 643 if(0x1b == *src) {
michael@0 644 mLastLegalState = mState;
michael@0 645 mState = mState_ESC;
michael@0 646 } else if(*src & 0x80) {
michael@0 647 mLastLegalState = mState;
michael@0 648 mState = mState_ERROR;
michael@0 649 } else {
michael@0 650 mData = JIS0208_INDEX[*src & 0x7F];
michael@0 651 if (0xFFFD == mData) {
michael@0 652 if (mErrBehavior == kOnError_Signal)
michael@0 653 goto error3;
michael@0 654 mState = mState_ERROR;
michael@0 655 } else {
michael@0 656 mState = mState_JISX0208_1978_2ndbyte;
michael@0 657 }
michael@0 658 }
michael@0 659 break;
michael@0 660
michael@0 661 case mState_GB2312_1980:
michael@0 662 if(0x1b == *src) {
michael@0 663 mLastLegalState = mState;
michael@0 664 mState = mState_ESC;
michael@0 665 } else if(*src & 0x80) {
michael@0 666 mLastLegalState = mState;
michael@0 667 mState = mState_ERROR;
michael@0 668 } else {
michael@0 669 mData = fbIdx[*src & 0x7F];
michael@0 670 if (0xFFFD == mData) {
michael@0 671 if (mErrBehavior == kOnError_Signal)
michael@0 672 goto error3;
michael@0 673 mState = mState_ERROR;
michael@0 674 } else {
michael@0 675 mState = mState_GB2312_1980_2ndbyte;
michael@0 676 }
michael@0 677 }
michael@0 678 break;
michael@0 679
michael@0 680 case mState_JISX0208_1983:
michael@0 681 if(0x1b == *src) {
michael@0 682 mLastLegalState = mState;
michael@0 683 mState = mState_ESC;
michael@0 684 } else if(*src & 0x80) {
michael@0 685 mLastLegalState = mState;
michael@0 686 mState = mState_ERROR;
michael@0 687 } else {
michael@0 688 mData = JIS0208_INDEX[*src & 0x7F];
michael@0 689 if (0xFFFD == mData) {
michael@0 690 if (mErrBehavior == kOnError_Signal)
michael@0 691 goto error3;
michael@0 692 mState = mState_ERROR;
michael@0 693 } else {
michael@0 694 mState = mState_JISX0208_1983_2ndbyte;
michael@0 695 }
michael@0 696 }
michael@0 697 break;
michael@0 698
michael@0 699 case mState_KSC5601_1987:
michael@0 700 if(0x1b == *src) {
michael@0 701 mLastLegalState = mState;
michael@0 702 mState = mState_ESC;
michael@0 703 } else if(*src & 0x80) {
michael@0 704 mLastLegalState = mState;
michael@0 705 mState = mState_ERROR;
michael@0 706 } else {
michael@0 707 mData = fbIdx[*src & 0x7F];
michael@0 708 if (0xFFFD == mData) {
michael@0 709 if (mErrBehavior == kOnError_Signal)
michael@0 710 goto error3;
michael@0 711 mState = mState_ERROR;
michael@0 712 } else {
michael@0 713 mState = mState_KSC5601_1987_2ndbyte;
michael@0 714 }
michael@0 715 }
michael@0 716 break;
michael@0 717
michael@0 718 case mState_JISX0212_1990:
michael@0 719 if(0x1b == *src) {
michael@0 720 mLastLegalState = mState;
michael@0 721 mState = mState_ESC;
michael@0 722 } else if(*src & 0x80) {
michael@0 723 mLastLegalState = mState;
michael@0 724 mState = mState_ERROR;
michael@0 725 } else {
michael@0 726 mData = JIS0212_INDEX[*src & 0x7F];
michael@0 727 if (0xFFFD == mData) {
michael@0 728 if (mErrBehavior == kOnError_Signal)
michael@0 729 goto error3;
michael@0 730 mState = mState_ERROR;
michael@0 731 } else {
michael@0 732 mState = mState_JISX0212_1990_2ndbyte;
michael@0 733 }
michael@0 734 }
michael@0 735 break;
michael@0 736
michael@0 737 case mState_JISX0208_1978_2ndbyte:
michael@0 738 {
michael@0 739 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 740 goto error1;
michael@0 741 uint8_t off = sbIdx[*src];
michael@0 742 if(0xFF == off) {
michael@0 743 if (mErrBehavior == kOnError_Signal)
michael@0 744 goto error3;
michael@0 745 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 746 } else {
michael@0 747 // XXX We need to map from JIS X 0208 1983 to 1987
michael@0 748 // in the next line before pass to *dest++
michael@0 749 *dest++ = gJapaneseMap[mData+off];
michael@0 750 }
michael@0 751 ++mRunLength;
michael@0 752 mState = mState_JISX0208_1978;
michael@0 753 }
michael@0 754 break;
michael@0 755
michael@0 756 case mState_GB2312_1980_2ndbyte:
michael@0 757 {
michael@0 758 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 759 goto error1;
michael@0 760 uint8_t off = sbIdx[*src];
michael@0 761 if(0xFF == off) {
michael@0 762 if (mErrBehavior == kOnError_Signal)
michael@0 763 goto error3;
michael@0 764 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 765 } else {
michael@0 766 if (!mGB2312Decoder) {
michael@0 767 // creating a delegate converter (GB2312)
michael@0 768 nsresult rv;
michael@0 769 nsCOMPtr<nsICharsetConverterManager> ccm =
michael@0 770 do_GetService(kCharsetConverterManagerCID, &rv);
michael@0 771 if (NS_SUCCEEDED(rv)) {
michael@0 772 rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder);
michael@0 773 }
michael@0 774 }
michael@0 775 if (!mGB2312Decoder) {// failed creating a delegate converter
michael@0 776 goto error2;
michael@0 777 } else {
michael@0 778 unsigned char gb[2];
michael@0 779 char16_t uni;
michael@0 780 int32_t gbLen = 2, uniLen = 1;
michael@0 781 // ((mData/94)+0x21) is the original 1st byte.
michael@0 782 // *src is the present 2nd byte.
michael@0 783 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
michael@0 784 gb[0] = ((mData / 94) + 0x21) | 0x80;
michael@0 785 gb[1] = *src | 0x80;
michael@0 786 // Convert GB2312 to unicode.
michael@0 787 mGB2312Decoder->Convert((const char *)gb, &gbLen,
michael@0 788 &uni, &uniLen);
michael@0 789 *dest++ = uni;
michael@0 790 }
michael@0 791 }
michael@0 792 ++mRunLength;
michael@0 793 mState = mState_GB2312_1980;
michael@0 794 }
michael@0 795 break;
michael@0 796
michael@0 797 case mState_JISX0208_1983_2ndbyte:
michael@0 798 {
michael@0 799 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 800 goto error1;
michael@0 801 uint8_t off = sbIdx[*src];
michael@0 802 if(0xFF == off) {
michael@0 803 if (mErrBehavior == kOnError_Signal)
michael@0 804 goto error3;
michael@0 805 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 806 } else {
michael@0 807 *dest++ = gJapaneseMap[mData+off];
michael@0 808 }
michael@0 809 ++mRunLength;
michael@0 810 mState = mState_JISX0208_1983;
michael@0 811 }
michael@0 812 break;
michael@0 813
michael@0 814 case mState_KSC5601_1987_2ndbyte:
michael@0 815 {
michael@0 816 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 817 goto error1;
michael@0 818 uint8_t off = sbIdx[*src];
michael@0 819 if(0xFF == off) {
michael@0 820 if (mErrBehavior == kOnError_Signal)
michael@0 821 goto error3;
michael@0 822 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 823 } else {
michael@0 824 if (!mEUCKRDecoder) {
michael@0 825 // creating a delegate converter (EUC-KR)
michael@0 826 nsresult rv;
michael@0 827 nsCOMPtr<nsICharsetConverterManager> ccm =
michael@0 828 do_GetService(kCharsetConverterManagerCID, &rv);
michael@0 829 if (NS_SUCCEEDED(rv)) {
michael@0 830 rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
michael@0 831 }
michael@0 832 }
michael@0 833 if (!mEUCKRDecoder) {// failed creating a delegate converter
michael@0 834 goto error2;
michael@0 835 } else {
michael@0 836 unsigned char ksc[2];
michael@0 837 char16_t uni;
michael@0 838 int32_t kscLen = 2, uniLen = 1;
michael@0 839 // ((mData/94)+0x21) is the original 1st byte.
michael@0 840 // *src is the present 2nd byte.
michael@0 841 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
michael@0 842 ksc[0] = ((mData / 94) + 0x21) | 0x80;
michael@0 843 ksc[1] = *src | 0x80;
michael@0 844 // Convert EUC-KR to unicode.
michael@0 845 mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
michael@0 846 &uni, &uniLen);
michael@0 847 *dest++ = uni;
michael@0 848 }
michael@0 849 }
michael@0 850 ++mRunLength;
michael@0 851 mState = mState_KSC5601_1987;
michael@0 852 }
michael@0 853 break;
michael@0 854
michael@0 855 case mState_JISX0212_1990_2ndbyte:
michael@0 856 {
michael@0 857 uint8_t off = sbIdx[*src];
michael@0 858 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 859 goto error1;
michael@0 860 if(0xFF == off) {
michael@0 861 if (mErrBehavior == kOnError_Signal)
michael@0 862 goto error3;
michael@0 863 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 864 } else {
michael@0 865 *dest++ = gJapaneseMap[mData+off];
michael@0 866 }
michael@0 867 ++mRunLength;
michael@0 868 mState = mState_JISX0212_1990;
michael@0 869 }
michael@0 870 break;
michael@0 871
michael@0 872 case mState_ESC_2e: // ESC .
michael@0 873 // "ESC ." will designate 96 character set to G2.
michael@0 874 mState = mLastLegalState;
michael@0 875 if( 'A' == *src) {
michael@0 876 G2charset = G2_ISO88591;
michael@0 877 } else if ('F' == *src) {
michael@0 878 G2charset = G2_ISO88597;
michael@0 879 } else {
michael@0 880 if (CHECK_OVERRUN(dest, destEnd, 3))
michael@0 881 goto error1;
michael@0 882 *dest++ = (char16_t) 0x1b;
michael@0 883 *dest++ = (char16_t) '.';
michael@0 884 if (0x80 & *src) {
michael@0 885 if (mErrBehavior == kOnError_Signal)
michael@0 886 goto error3;
michael@0 887 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 888 } else {
michael@0 889 *dest++ = (char16_t) *src;
michael@0 890 }
michael@0 891 }
michael@0 892 break;
michael@0 893
michael@0 894 case mState_ESC_4e: // ESC N
michael@0 895 // "ESC N" is the SS2 sequence, that invoke a G2 designated
michael@0 896 // character set. Since SS2 is effective only for next one
michael@0 897 // character, mState should be returned to the last status.
michael@0 898 mState = mLastLegalState;
michael@0 899 if((0x20 <= *src) && (*src <= 0x7F)) {
michael@0 900 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 901 goto error1;
michael@0 902 if (G2_ISO88591 == G2charset) {
michael@0 903 *dest++ = *src | 0x80;
michael@0 904 } else if (G2_ISO88597 == G2charset) {
michael@0 905 if (!mISO88597Decoder) {
michael@0 906 // creating a delegate converter (ISO-8859-7)
michael@0 907 nsresult rv;
michael@0 908 nsCOMPtr<nsICharsetConverterManager> ccm =
michael@0 909 do_GetService(kCharsetConverterManagerCID, &rv);
michael@0 910 if (NS_SUCCEEDED(rv)) {
michael@0 911 rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder);
michael@0 912 }
michael@0 913 }
michael@0 914 if (!mISO88597Decoder) {// failed creating a delegate converter
michael@0 915 goto error2;
michael@0 916 } else {
michael@0 917 // Put one character with ISO-8859-7 encoding.
michael@0 918 unsigned char gr = *src | 0x80;
michael@0 919 char16_t uni;
michael@0 920 int32_t grLen = 1, uniLen = 1;
michael@0 921 // Convert ISO-8859-7 to unicode.
michael@0 922 mISO88597Decoder->Convert((const char *)&gr, &grLen,
michael@0 923 &uni, &uniLen);
michael@0 924 *dest++ = uni;
michael@0 925 }
michael@0 926 } else {// G2charset is G2_unknown (not designated yet)
michael@0 927 if (mErrBehavior == kOnError_Signal)
michael@0 928 goto error3;
michael@0 929 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 930 }
michael@0 931 ++mRunLength;
michael@0 932 } else {
michael@0 933 if (CHECK_OVERRUN(dest, destEnd, 3))
michael@0 934 goto error1;
michael@0 935 *dest++ = (char16_t) 0x1b;
michael@0 936 *dest++ = (char16_t) 'N';
michael@0 937 if (0x80 & *src) {
michael@0 938 if (mErrBehavior == kOnError_Signal)
michael@0 939 goto error3;
michael@0 940 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 941 } else {
michael@0 942 *dest++ = (char16_t) *src;
michael@0 943 }
michael@0 944 }
michael@0 945 break;
michael@0 946
michael@0 947 case mState_ERROR:
michael@0 948 mState = mLastLegalState;
michael@0 949 if (mErrBehavior == kOnError_Signal) {
michael@0 950 mRunLength = 0;
michael@0 951 goto error3;
michael@0 952 }
michael@0 953 if (CHECK_OVERRUN(dest, destEnd, 1))
michael@0 954 goto error1;
michael@0 955 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
michael@0 956 ++mRunLength;
michael@0 957 break;
michael@0 958
michael@0 959 } // switch
michael@0 960 src++;
michael@0 961 }
michael@0 962 *aDestLen = dest - aDest;
michael@0 963 return NS_OK;
michael@0 964 error1:
michael@0 965 *aDestLen = dest - aDest;
michael@0 966 *aSrcLen = src - (const unsigned char*)aSrc;
michael@0 967 return NS_OK_UDEC_MOREOUTPUT;
michael@0 968 error2:
michael@0 969 *aDestLen = dest - aDest;
michael@0 970 *aSrcLen = src - (const unsigned char*)aSrc;
michael@0 971 return NS_ERROR_UNEXPECTED;
michael@0 972 error3:
michael@0 973 *aDestLen = dest - aDest;
michael@0 974 *aSrcLen = src - (const unsigned char*)aSrc;
michael@0 975 return NS_ERROR_ILLEGAL_INPUT;
michael@0 976 }

mercurial