Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | #include "nsJapaneseToUnicode.h" |
michael@0 | 6 | |
michael@0 | 7 | #include "nsUCSupport.h" |
michael@0 | 8 | |
michael@0 | 9 | #include "japanese.map" |
michael@0 | 10 | |
michael@0 | 11 | #include "nsICharsetConverterManager.h" |
michael@0 | 12 | #include "nsServiceManagerUtils.h" |
michael@0 | 13 | |
michael@0 | 14 | #include "mozilla/Assertions.h" |
michael@0 | 15 | |
michael@0 | 16 | // HTML5 says to use Windows-31J instead of the real Shift_JIS for decoding |
michael@0 | 17 | #define SJIS_INDEX gCP932Index[0] |
michael@0 | 18 | #define JIS0208_INDEX gCP932Index[1] |
michael@0 | 19 | |
michael@0 | 20 | #define JIS0212_INDEX gJIS0212Index |
michael@0 | 21 | #define SJIS_UNMAPPED 0x30fb |
michael@0 | 22 | #define UNICODE_REPLACEMENT_CHARACTER 0xfffd |
michael@0 | 23 | #define IN_GR_RANGE(b) \ |
michael@0 | 24 | ((uint8_t(0xa1) <= uint8_t(b)) && (uint8_t(b) <= uint8_t(0xfe))) |
michael@0 | 25 | |
michael@0 | 26 | NS_IMETHODIMP nsShiftJISToUnicode::Convert( |
michael@0 | 27 | const char * aSrc, int32_t * aSrcLen, |
michael@0 | 28 | char16_t * aDest, int32_t * aDestLen) |
michael@0 | 29 | { |
michael@0 | 30 | static const uint8_t sbIdx[256] = |
michael@0 | 31 | { |
michael@0 | 32 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00 */ |
michael@0 | 33 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08 */ |
michael@0 | 34 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10 */ |
michael@0 | 35 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18 */ |
michael@0 | 36 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20 */ |
michael@0 | 37 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x28 */ |
michael@0 | 38 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x30 */ |
michael@0 | 39 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38 */ |
michael@0 | 40 | 0, 1, 2, 3, 4, 5, 6, 7, /* 0x40 */ |
michael@0 | 41 | 8, 9, 10, 11, 12, 13, 14, 15, /* 0x48 */ |
michael@0 | 42 | 16, 17, 18, 19, 20, 21, 22, 23, /* 0x50 */ |
michael@0 | 43 | 24, 25, 26, 27, 28, 29, 30, 31, /* 0x58 */ |
michael@0 | 44 | 32, 33, 34, 35, 36, 37, 38, 39, /* 0x60 */ |
michael@0 | 45 | 40, 41, 42, 43, 44, 45, 46, 47, /* 0x68 */ |
michael@0 | 46 | 48, 49, 50, 51, 52, 53, 54, 55, /* 0x70 */ |
michael@0 | 47 | 56, 57, 58, 59, 60, 61, 62, 0xFF, /* 0x78 */ |
michael@0 | 48 | 63, 64, 65, 66, 67, 68, 69, 70, /* 0x80 */ |
michael@0 | 49 | 71, 72, 73, 74, 75, 76, 77, 78, /* 0x88 */ |
michael@0 | 50 | 79, 80, 81, 82, 83, 84, 85, 86, /* 0x90 */ |
michael@0 | 51 | 87, 88, 89, 90, 91, 92, 93, 94, /* 0x98 */ |
michael@0 | 52 | 95, 96, 97, 98, 99, 100, 101, 102, /* 0xa0 */ |
michael@0 | 53 | 103, 104, 105, 106, 107, 108, 109, 110, /* 0xa8 */ |
michael@0 | 54 | 111, 112, 113, 114, 115, 116, 117, 118, /* 0xb0 */ |
michael@0 | 55 | 119, 120, 121, 122, 123, 124, 125, 126, /* 0xb8 */ |
michael@0 | 56 | 127, 128, 129, 130, 131, 132, 133, 134, /* 0xc0 */ |
michael@0 | 57 | 135, 136, 137, 138, 139, 140, 141, 142, /* 0xc8 */ |
michael@0 | 58 | 143, 144, 145, 146, 147, 148, 149, 150, /* 0xd0 */ |
michael@0 | 59 | 151, 152, 153, 154, 155, 156, 157, 158, /* 0xd8 */ |
michael@0 | 60 | 159, 160, 161, 162, 163, 164, 165, 166, /* 0xe0 */ |
michael@0 | 61 | 167, 168, 169, 170, 171, 172, 173, 174, /* 0xe8 */ |
michael@0 | 62 | 175, 176, 177, 178, 179, 180, 181, 182, /* 0xf0 */ |
michael@0 | 63 | 183, 184, 185, 186, 187, 0xFF, 0xFF, 0xFF, /* 0xf8 */ |
michael@0 | 64 | }; |
michael@0 | 65 | |
michael@0 | 66 | const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; |
michael@0 | 67 | const unsigned char* src =(unsigned char*) aSrc; |
michael@0 | 68 | char16_t* destEnd = aDest + *aDestLen; |
michael@0 | 69 | char16_t* dest = aDest; |
michael@0 | 70 | while (src < srcEnd) { |
michael@0 | 71 | switch (mState) { |
michael@0 | 72 | case 0: |
michael@0 | 73 | if (*src <= 0x80) { |
michael@0 | 74 | // ASCII |
michael@0 | 75 | *dest++ = (char16_t) *src; |
michael@0 | 76 | if (dest >= destEnd) { |
michael@0 | 77 | goto error1; |
michael@0 | 78 | } |
michael@0 | 79 | } else { |
michael@0 | 80 | mData = SJIS_INDEX[*src & 0x7F]; |
michael@0 | 81 | if (mData < 0xE000) { |
michael@0 | 82 | mState = 1; // two bytes |
michael@0 | 83 | } else if (mData < 0xF000) { |
michael@0 | 84 | mState = 2; // EUDC |
michael@0 | 85 | } else { |
michael@0 | 86 | *dest++ = mData; // JIS 0201 |
michael@0 | 87 | if (dest >= destEnd) { |
michael@0 | 88 | goto error1; |
michael@0 | 89 | } |
michael@0 | 90 | } |
michael@0 | 91 | } |
michael@0 | 92 | break; |
michael@0 | 93 | |
michael@0 | 94 | case 1: // Index to table |
michael@0 | 95 | { |
michael@0 | 96 | MOZ_ASSERT(mData < 0xE000); |
michael@0 | 97 | uint8_t off = sbIdx[*src]; |
michael@0 | 98 | |
michael@0 | 99 | // Error handling: in the case where the second octet is not in the |
michael@0 | 100 | // valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and |
michael@0 | 101 | // interpret it as the ASCII value. In the case where the second |
michael@0 | 102 | // octet is in the valid range but there is no mapping for the |
michael@0 | 103 | // 2-octet sequence, do not unconsume. |
michael@0 | 104 | if(0xFF == off) { |
michael@0 | 105 | src--; |
michael@0 | 106 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 107 | goto error_invalidchar; |
michael@0 | 108 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 109 | } else { |
michael@0 | 110 | char16_t ch = gJapaneseMap[mData+off]; |
michael@0 | 111 | if(ch == 0xfffd) { |
michael@0 | 112 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 113 | goto error_invalidchar; |
michael@0 | 114 | ch = SJIS_UNMAPPED; |
michael@0 | 115 | } |
michael@0 | 116 | *dest++ = ch; |
michael@0 | 117 | } |
michael@0 | 118 | mState = 0; |
michael@0 | 119 | if(dest >= destEnd) |
michael@0 | 120 | goto error1; |
michael@0 | 121 | } |
michael@0 | 122 | break; |
michael@0 | 123 | |
michael@0 | 124 | case 2: // EUDC |
michael@0 | 125 | { |
michael@0 | 126 | MOZ_ASSERT(0xE000 <= mData && mData < 0xF000); |
michael@0 | 127 | uint8_t off = sbIdx[*src]; |
michael@0 | 128 | |
michael@0 | 129 | // Error handling as in case 1 |
michael@0 | 130 | if(0xFF == off) { |
michael@0 | 131 | src--; |
michael@0 | 132 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 133 | goto error_invalidchar; |
michael@0 | 134 | |
michael@0 | 135 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 136 | } else { |
michael@0 | 137 | *dest++ = mData + off; |
michael@0 | 138 | } |
michael@0 | 139 | mState = 0; |
michael@0 | 140 | if(dest >= destEnd) |
michael@0 | 141 | goto error1; |
michael@0 | 142 | } |
michael@0 | 143 | break; |
michael@0 | 144 | |
michael@0 | 145 | } |
michael@0 | 146 | src++; |
michael@0 | 147 | } |
michael@0 | 148 | *aDestLen = dest - aDest; |
michael@0 | 149 | return NS_OK; |
michael@0 | 150 | error_invalidchar: |
michael@0 | 151 | *aDestLen = dest - aDest; |
michael@0 | 152 | *aSrcLen = src - (const unsigned char*)aSrc; |
michael@0 | 153 | return NS_ERROR_ILLEGAL_INPUT; |
michael@0 | 154 | error1: |
michael@0 | 155 | *aDestLen = dest - aDest; |
michael@0 | 156 | src++; |
michael@0 | 157 | if ((mState == 0) && (src == srcEnd)) { |
michael@0 | 158 | return NS_OK; |
michael@0 | 159 | } |
michael@0 | 160 | *aSrcLen = src - (const unsigned char*)aSrc; |
michael@0 | 161 | return NS_OK_UDEC_MOREOUTPUT; |
michael@0 | 162 | } |
michael@0 | 163 | |
michael@0 | 164 | char16_t |
michael@0 | 165 | nsShiftJISToUnicode::GetCharacterForUnMapped() |
michael@0 | 166 | { |
michael@0 | 167 | return char16_t(SJIS_UNMAPPED); |
michael@0 | 168 | } |
michael@0 | 169 | |
michael@0 | 170 | NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert( |
michael@0 | 171 | const char * aSrc, int32_t * aSrcLen, |
michael@0 | 172 | char16_t * aDest, int32_t * aDestLen) |
michael@0 | 173 | { |
michael@0 | 174 | static const uint8_t sbIdx[256] = |
michael@0 | 175 | { |
michael@0 | 176 | /* 0x0X */ |
michael@0 | 177 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 178 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 179 | /* 0x1X */ |
michael@0 | 180 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 181 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 182 | /* 0x2X */ |
michael@0 | 183 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 184 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 185 | /* 0x3X */ |
michael@0 | 186 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 187 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 188 | /* 0x4X */ |
michael@0 | 189 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 190 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 191 | /* 0x5X */ |
michael@0 | 192 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 193 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 194 | /* 0x6X */ |
michael@0 | 195 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 196 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 197 | /* 0x7X */ |
michael@0 | 198 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 199 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 200 | /* 0x8X */ |
michael@0 | 201 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 202 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 203 | /* 0x9X */ |
michael@0 | 204 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 205 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 206 | /* 0xAX */ |
michael@0 | 207 | 0xFF, 0, 1, 2, 3, 4, 5, 6, |
michael@0 | 208 | 7, 8 , 9, 10, 11, 12, 13, 14, |
michael@0 | 209 | /* 0xBX */ |
michael@0 | 210 | 15, 16, 17, 18, 19, 20, 21, 22, |
michael@0 | 211 | 23, 24, 25, 26, 27, 28, 29, 30, |
michael@0 | 212 | /* 0xCX */ |
michael@0 | 213 | 31, 32, 33, 34, 35, 36, 37, 38, |
michael@0 | 214 | 39, 40, 41, 42, 43, 44, 45, 46, |
michael@0 | 215 | /* 0xDX */ |
michael@0 | 216 | 47, 48, 49, 50, 51, 52, 53, 54, |
michael@0 | 217 | 55, 56, 57, 58, 59, 60, 61, 62, |
michael@0 | 218 | /* 0xEX */ |
michael@0 | 219 | 63, 64, 65, 66, 67, 68, 69, 70, |
michael@0 | 220 | 71, 72, 73, 74, 75, 76, 77, 78, |
michael@0 | 221 | /* 0xFX */ |
michael@0 | 222 | 79, 80, 81, 82, 83, 84, 85, 86, |
michael@0 | 223 | 87, 88, 89, 90, 91, 92, 93, 0xFF, |
michael@0 | 224 | }; |
michael@0 | 225 | |
michael@0 | 226 | const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; |
michael@0 | 227 | const unsigned char* src =(unsigned char*) aSrc; |
michael@0 | 228 | char16_t* destEnd = aDest + *aDestLen; |
michael@0 | 229 | char16_t* dest = aDest; |
michael@0 | 230 | while((src < srcEnd)) |
michael@0 | 231 | { |
michael@0 | 232 | switch(mState) |
michael@0 | 233 | { |
michael@0 | 234 | case 0: |
michael@0 | 235 | if(*src & 0x80 && *src != (unsigned char)0xa0) |
michael@0 | 236 | { |
michael@0 | 237 | mData = JIS0208_INDEX[*src & 0x7F]; |
michael@0 | 238 | if(mData != 0xFFFD ) |
michael@0 | 239 | { |
michael@0 | 240 | mState = 1; // two byte JIS0208 |
michael@0 | 241 | } else { |
michael@0 | 242 | if( 0x8e == *src) { |
michael@0 | 243 | // JIS 0201 |
michael@0 | 244 | mState = 2; // JIS0201 |
michael@0 | 245 | } else if(0x8f == *src) { |
michael@0 | 246 | // JIS 0212 |
michael@0 | 247 | mState = 3; // JIS0212 |
michael@0 | 248 | } else { |
michael@0 | 249 | // others |
michael@0 | 250 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 251 | goto error_invalidchar; |
michael@0 | 252 | *dest++ = 0xFFFD; |
michael@0 | 253 | if(dest >= destEnd) |
michael@0 | 254 | goto error1; |
michael@0 | 255 | } |
michael@0 | 256 | } |
michael@0 | 257 | } else { |
michael@0 | 258 | // ASCII |
michael@0 | 259 | *dest++ = (char16_t) *src; |
michael@0 | 260 | if(dest >= destEnd) |
michael@0 | 261 | goto error1; |
michael@0 | 262 | } |
michael@0 | 263 | break; |
michael@0 | 264 | |
michael@0 | 265 | case 1: // Index to table |
michael@0 | 266 | { |
michael@0 | 267 | uint8_t off = sbIdx[*src]; |
michael@0 | 268 | if(0xFF == off) { |
michael@0 | 269 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 270 | goto error_invalidchar; |
michael@0 | 271 | *dest++ = 0xFFFD; |
michael@0 | 272 | // if the first byte is valid for EUC-JP but the second |
michael@0 | 273 | // is not while being a valid US-ASCII, save it |
michael@0 | 274 | // instead of eating it up ! |
michael@0 | 275 | if ( (uint8_t)*src < (uint8_t)0x7f ) |
michael@0 | 276 | --src; |
michael@0 | 277 | } else { |
michael@0 | 278 | *dest++ = gJapaneseMap[mData+off]; |
michael@0 | 279 | } |
michael@0 | 280 | mState = 0; |
michael@0 | 281 | if(dest >= destEnd) |
michael@0 | 282 | goto error1; |
michael@0 | 283 | } |
michael@0 | 284 | break; |
michael@0 | 285 | |
michael@0 | 286 | case 2: // JIS 0201 |
michael@0 | 287 | { |
michael@0 | 288 | if((0xA1 <= *src) && (*src <= 0xDF)) { |
michael@0 | 289 | *dest++ = (0xFF61-0x00A1) + *src; |
michael@0 | 290 | } else { |
michael@0 | 291 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 292 | goto error_invalidchar; |
michael@0 | 293 | *dest++ = 0xFFFD; |
michael@0 | 294 | // if 0x8e is not followed by a valid JIS X 0201 byte |
michael@0 | 295 | // but by a valid US-ASCII, save it instead of eating it up. |
michael@0 | 296 | if ( (uint8_t)*src < (uint8_t)0x7f ) |
michael@0 | 297 | --src; |
michael@0 | 298 | } |
michael@0 | 299 | mState = 0; |
michael@0 | 300 | if(dest >= destEnd) |
michael@0 | 301 | goto error1; |
michael@0 | 302 | } |
michael@0 | 303 | break; |
michael@0 | 304 | |
michael@0 | 305 | case 3: // JIS 0212 |
michael@0 | 306 | { |
michael@0 | 307 | if (IN_GR_RANGE(*src)) |
michael@0 | 308 | { |
michael@0 | 309 | mData = JIS0212_INDEX[*src & 0x7F]; |
michael@0 | 310 | if(mData != 0xFFFD ) |
michael@0 | 311 | { |
michael@0 | 312 | mState = 4; |
michael@0 | 313 | } else { |
michael@0 | 314 | mState = 5; // error |
michael@0 | 315 | } |
michael@0 | 316 | } else { |
michael@0 | 317 | // First "JIS 0212" byte is not in the valid GR range: save it |
michael@0 | 318 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 319 | goto error_invalidchar; |
michael@0 | 320 | *dest++ = 0xFFFD; |
michael@0 | 321 | --src; |
michael@0 | 322 | mState = 0; |
michael@0 | 323 | if(dest >= destEnd) |
michael@0 | 324 | goto error1; |
michael@0 | 325 | } |
michael@0 | 326 | } |
michael@0 | 327 | break; |
michael@0 | 328 | case 4: |
michael@0 | 329 | { |
michael@0 | 330 | uint8_t off = sbIdx[*src]; |
michael@0 | 331 | if(0xFF != off) { |
michael@0 | 332 | *dest++ = gJapaneseMap[mData+off]; |
michael@0 | 333 | mState = 0; |
michael@0 | 334 | if(dest >= destEnd) |
michael@0 | 335 | goto error1; |
michael@0 | 336 | break; |
michael@0 | 337 | } |
michael@0 | 338 | // else fall through to error handler |
michael@0 | 339 | } |
michael@0 | 340 | case 5: // two bytes undefined |
michael@0 | 341 | { |
michael@0 | 342 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 343 | goto error_invalidchar; |
michael@0 | 344 | *dest++ = 0xFFFD; |
michael@0 | 345 | // Undefined JIS 0212 two byte sequence. If the second byte is in |
michael@0 | 346 | // the valid range for a two byte sequence (0xa1 - 0xfe) consume |
michael@0 | 347 | // both bytes. Otherwise resynchronize on the second byte. |
michael@0 | 348 | if (!IN_GR_RANGE(*src)) |
michael@0 | 349 | --src; |
michael@0 | 350 | mState = 0; |
michael@0 | 351 | if(dest >= destEnd) |
michael@0 | 352 | goto error1; |
michael@0 | 353 | } |
michael@0 | 354 | break; |
michael@0 | 355 | } |
michael@0 | 356 | src++; |
michael@0 | 357 | } |
michael@0 | 358 | *aDestLen = dest - aDest; |
michael@0 | 359 | return NS_OK; |
michael@0 | 360 | error_invalidchar: |
michael@0 | 361 | *aDestLen = dest - aDest; |
michael@0 | 362 | *aSrcLen = src - (const unsigned char*)aSrc; |
michael@0 | 363 | return NS_ERROR_ILLEGAL_INPUT; |
michael@0 | 364 | error1: |
michael@0 | 365 | *aDestLen = dest - aDest; |
michael@0 | 366 | src++; |
michael@0 | 367 | if ((mState == 0) && (src == srcEnd)) { |
michael@0 | 368 | return NS_OK; |
michael@0 | 369 | } |
michael@0 | 370 | *aSrcLen = src - (const unsigned char*)aSrc; |
michael@0 | 371 | return NS_OK_UDEC_MOREOUTPUT; |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | |
michael@0 | 375 | |
michael@0 | 376 | NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert( |
michael@0 | 377 | const char * aSrc, int32_t * aSrcLen, |
michael@0 | 378 | char16_t * aDest, int32_t * aDestLen) |
michael@0 | 379 | { |
michael@0 | 380 | static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); |
michael@0 | 381 | |
michael@0 | 382 | static const uint16_t fbIdx[128] = |
michael@0 | 383 | { |
michael@0 | 384 | /* 0x8X */ |
michael@0 | 385 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
michael@0 | 386 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
michael@0 | 387 | /* 0x9X */ |
michael@0 | 388 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
michael@0 | 389 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, |
michael@0 | 390 | /* 0xAX */ |
michael@0 | 391 | 0xFFFD, 0, 94, 94* 2, 94* 3, 94* 4, 94* 5, 94* 6, |
michael@0 | 392 | 94* 7, 94* 8 , 94* 9, 94*10, 94*11, 94*12, 94*13, 94*14, |
michael@0 | 393 | /* 0xBX */ |
michael@0 | 394 | 94*15, 94*16, 94*17, 94*18, 94*19, 94*20, 94*21, 94*22, |
michael@0 | 395 | 94*23, 94*24, 94*25, 94*26, 94*27, 94*28, 94*29, 94*30, |
michael@0 | 396 | /* 0xCX */ |
michael@0 | 397 | 94*31, 94*32, 94*33, 94*34, 94*35, 94*36, 94*37, 94*38, |
michael@0 | 398 | 94*39, 94*40, 94*41, 94*42, 94*43, 94*44, 94*45, 94*46, |
michael@0 | 399 | /* 0xDX */ |
michael@0 | 400 | 94*47, 94*48, 94*49, 94*50, 94*51, 94*52, 94*53, 94*54, |
michael@0 | 401 | 94*55, 94*56, 94*57, 94*58, 94*59, 94*60, 94*61, 94*62, |
michael@0 | 402 | /* 0xEX */ |
michael@0 | 403 | 94*63, 94*64, 94*65, 94*66, 94*67, 94*68, 94*69, 94*70, |
michael@0 | 404 | 94*71, 94*72, 94*73, 94*74, 94*75, 94*76, 94*77, 94*78, |
michael@0 | 405 | /* 0xFX */ |
michael@0 | 406 | 94*79, 94*80, 94*81, 94*82, 94*83, 94*84, 94*85, 94*86, |
michael@0 | 407 | 94*87, 94*88, 94*89, 94*90, 94*91, 94*92, 94*93, 0xFFFD, |
michael@0 | 408 | }; |
michael@0 | 409 | static const uint8_t sbIdx[256] = |
michael@0 | 410 | { |
michael@0 | 411 | /* 0x0X */ |
michael@0 | 412 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 413 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 414 | /* 0x1X */ |
michael@0 | 415 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 416 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 417 | /* 0x2X */ |
michael@0 | 418 | 0xFF, 0, 1, 2, 3, 4, 5, 6, |
michael@0 | 419 | 7, 8 , 9, 10, 11, 12, 13, 14, |
michael@0 | 420 | /* 0x3X */ |
michael@0 | 421 | 15, 16, 17, 18, 19, 20, 21, 22, |
michael@0 | 422 | 23, 24, 25, 26, 27, 28, 29, 30, |
michael@0 | 423 | /* 0x4X */ |
michael@0 | 424 | 31, 32, 33, 34, 35, 36, 37, 38, |
michael@0 | 425 | 39, 40, 41, 42, 43, 44, 45, 46, |
michael@0 | 426 | /* 0x5X */ |
michael@0 | 427 | 47, 48, 49, 50, 51, 52, 53, 54, |
michael@0 | 428 | 55, 56, 57, 58, 59, 60, 61, 62, |
michael@0 | 429 | /* 0x6X */ |
michael@0 | 430 | 63, 64, 65, 66, 67, 68, 69, 70, |
michael@0 | 431 | 71, 72, 73, 74, 75, 76, 77, 78, |
michael@0 | 432 | /* 0x7X */ |
michael@0 | 433 | 79, 80, 81, 82, 83, 84, 85, 86, |
michael@0 | 434 | 87, 88, 89, 90, 91, 92, 93, 0xFF, |
michael@0 | 435 | /* 0x8X */ |
michael@0 | 436 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 437 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 438 | /* 0x9X */ |
michael@0 | 439 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 440 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 441 | /* 0xAX */ |
michael@0 | 442 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 443 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 444 | /* 0xBX */ |
michael@0 | 445 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 446 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 447 | /* 0xCX */ |
michael@0 | 448 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 449 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 450 | /* 0xDX */ |
michael@0 | 451 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 452 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 453 | /* 0xEX */ |
michael@0 | 454 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 455 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 456 | /* 0xFX */ |
michael@0 | 457 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 458 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
michael@0 | 459 | }; |
michael@0 | 460 | |
michael@0 | 461 | const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen; |
michael@0 | 462 | const unsigned char* src =(unsigned char*) aSrc; |
michael@0 | 463 | char16_t* destEnd = aDest + *aDestLen; |
michael@0 | 464 | char16_t* dest = aDest; |
michael@0 | 465 | while((src < srcEnd)) |
michael@0 | 466 | { |
michael@0 | 467 | |
michael@0 | 468 | switch(mState) |
michael@0 | 469 | { |
michael@0 | 470 | case mState_ASCII: |
michael@0 | 471 | if(0x1b == *src) |
michael@0 | 472 | { |
michael@0 | 473 | mLastLegalState = mState; |
michael@0 | 474 | mState = mState_ESC; |
michael@0 | 475 | } else if(*src & 0x80) { |
michael@0 | 476 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 477 | goto error3; |
michael@0 | 478 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 479 | goto error1; |
michael@0 | 480 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 481 | } else { |
michael@0 | 482 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 483 | goto error1; |
michael@0 | 484 | *dest++ = (char16_t) *src; |
michael@0 | 485 | } |
michael@0 | 486 | break; |
michael@0 | 487 | |
michael@0 | 488 | case mState_ESC: |
michael@0 | 489 | if( '(' == *src) { |
michael@0 | 490 | mState = mState_ESC_28; |
michael@0 | 491 | } else if ('$' == *src) { |
michael@0 | 492 | mState = mState_ESC_24; |
michael@0 | 493 | } else if ('.' == *src) { // for ISO-2022-JP-2 |
michael@0 | 494 | mState = mState_ESC_2e; |
michael@0 | 495 | } else if ('N' == *src) { // for ISO-2022-JP-2 |
michael@0 | 496 | mState = mState_ESC_4e; |
michael@0 | 497 | } else { |
michael@0 | 498 | if (CHECK_OVERRUN(dest, destEnd, 2)) |
michael@0 | 499 | goto error1; |
michael@0 | 500 | *dest++ = (char16_t) 0x1b; |
michael@0 | 501 | if (0x80 & *src) { |
michael@0 | 502 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 503 | goto error3; |
michael@0 | 504 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 505 | } else { |
michael@0 | 506 | *dest++ = (char16_t) *src; |
michael@0 | 507 | } |
michael@0 | 508 | mState = mLastLegalState; |
michael@0 | 509 | } |
michael@0 | 510 | break; |
michael@0 | 511 | |
michael@0 | 512 | case mState_ESC_28: // ESC ( |
michael@0 | 513 | if( 'B' == *src) { |
michael@0 | 514 | mState = mState_ASCII; |
michael@0 | 515 | if (mRunLength == 0) { |
michael@0 | 516 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 517 | goto error1; |
michael@0 | 518 | *dest++ = 0xFFFD; |
michael@0 | 519 | } |
michael@0 | 520 | mRunLength = 0; |
michael@0 | 521 | } else if ('J' == *src) { |
michael@0 | 522 | mState = mState_JISX0201_1976Roman; |
michael@0 | 523 | if (mRunLength == 0 && mLastLegalState != mState_ASCII) { |
michael@0 | 524 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 525 | goto error1; |
michael@0 | 526 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 527 | goto error3; |
michael@0 | 528 | *dest++ = 0xFFFD; |
michael@0 | 529 | } |
michael@0 | 530 | mRunLength = 0; |
michael@0 | 531 | } else if ('I' == *src) { |
michael@0 | 532 | mState = mState_JISX0201_1976Kana; |
michael@0 | 533 | mRunLength = 0; |
michael@0 | 534 | } else { |
michael@0 | 535 | if (CHECK_OVERRUN(dest, destEnd, 3)) |
michael@0 | 536 | goto error1; |
michael@0 | 537 | *dest++ = (char16_t) 0x1b; |
michael@0 | 538 | *dest++ = (char16_t) '('; |
michael@0 | 539 | if (0x80 & *src) { |
michael@0 | 540 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 541 | goto error3; |
michael@0 | 542 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 543 | } else { |
michael@0 | 544 | *dest++ = (char16_t) *src; |
michael@0 | 545 | } |
michael@0 | 546 | mState = mLastLegalState; |
michael@0 | 547 | } |
michael@0 | 548 | break; |
michael@0 | 549 | |
michael@0 | 550 | case mState_ESC_24: // ESC $ |
michael@0 | 551 | if( '@' == *src) { |
michael@0 | 552 | mState = mState_JISX0208_1978; |
michael@0 | 553 | mRunLength = 0; |
michael@0 | 554 | } else if ('A' == *src) { |
michael@0 | 555 | mState = mState_GB2312_1980; |
michael@0 | 556 | mRunLength = 0; |
michael@0 | 557 | } else if ('B' == *src) { |
michael@0 | 558 | mState = mState_JISX0208_1983; |
michael@0 | 559 | mRunLength = 0; |
michael@0 | 560 | } else if ('(' == *src) { |
michael@0 | 561 | mState = mState_ESC_24_28; |
michael@0 | 562 | } else { |
michael@0 | 563 | if (CHECK_OVERRUN(dest, destEnd, 3)) |
michael@0 | 564 | goto error1; |
michael@0 | 565 | *dest++ = (char16_t) 0x1b; |
michael@0 | 566 | *dest++ = (char16_t) '$'; |
michael@0 | 567 | if (0x80 & *src) { |
michael@0 | 568 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 569 | goto error3; |
michael@0 | 570 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 571 | } else { |
michael@0 | 572 | *dest++ = (char16_t) *src; |
michael@0 | 573 | } |
michael@0 | 574 | mState = mLastLegalState; |
michael@0 | 575 | } |
michael@0 | 576 | break; |
michael@0 | 577 | |
michael@0 | 578 | case mState_ESC_24_28: // ESC $ ( |
michael@0 | 579 | if( 'C' == *src) { |
michael@0 | 580 | mState = mState_KSC5601_1987; |
michael@0 | 581 | mRunLength = 0; |
michael@0 | 582 | } else if ('D' == *src) { |
michael@0 | 583 | mState = mState_JISX0212_1990; |
michael@0 | 584 | mRunLength = 0; |
michael@0 | 585 | } else { |
michael@0 | 586 | if (CHECK_OVERRUN(dest, destEnd, 4)) |
michael@0 | 587 | goto error1; |
michael@0 | 588 | *dest++ = (char16_t) 0x1b; |
michael@0 | 589 | *dest++ = (char16_t) '$'; |
michael@0 | 590 | *dest++ = (char16_t) '('; |
michael@0 | 591 | if (0x80 & *src) { |
michael@0 | 592 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 593 | goto error3; |
michael@0 | 594 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 595 | } else { |
michael@0 | 596 | *dest++ = (char16_t) *src; |
michael@0 | 597 | } |
michael@0 | 598 | mState = mLastLegalState; |
michael@0 | 599 | } |
michael@0 | 600 | break; |
michael@0 | 601 | |
michael@0 | 602 | case mState_JISX0201_1976Roman: |
michael@0 | 603 | if(0x1b == *src) { |
michael@0 | 604 | mLastLegalState = mState; |
michael@0 | 605 | mState = mState_ESC; |
michael@0 | 606 | } else if(*src & 0x80) { |
michael@0 | 607 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 608 | goto error3; |
michael@0 | 609 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 610 | goto error1; |
michael@0 | 611 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 612 | ++mRunLength; |
michael@0 | 613 | } else { |
michael@0 | 614 | // XXX We need to decide how to handle \ and ~ here |
michael@0 | 615 | // we may need a if statement here for '\' and '~' |
michael@0 | 616 | // to map them to Yen and Overbar |
michael@0 | 617 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 618 | goto error1; |
michael@0 | 619 | *dest++ = (char16_t) *src; |
michael@0 | 620 | ++mRunLength; |
michael@0 | 621 | } |
michael@0 | 622 | break; |
michael@0 | 623 | |
michael@0 | 624 | case mState_JISX0201_1976Kana: |
michael@0 | 625 | if(0x1b == *src) { |
michael@0 | 626 | mLastLegalState = mState; |
michael@0 | 627 | mState = mState_ESC; |
michael@0 | 628 | } else { |
michael@0 | 629 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 630 | goto error1; |
michael@0 | 631 | if((0x21 <= *src) && (*src <= 0x5F)) { |
michael@0 | 632 | *dest++ = (0xFF61-0x0021) + *src; |
michael@0 | 633 | } else { |
michael@0 | 634 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 635 | goto error3; |
michael@0 | 636 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 637 | } |
michael@0 | 638 | ++mRunLength; |
michael@0 | 639 | } |
michael@0 | 640 | break; |
michael@0 | 641 | |
michael@0 | 642 | case mState_JISX0208_1978: |
michael@0 | 643 | if(0x1b == *src) { |
michael@0 | 644 | mLastLegalState = mState; |
michael@0 | 645 | mState = mState_ESC; |
michael@0 | 646 | } else if(*src & 0x80) { |
michael@0 | 647 | mLastLegalState = mState; |
michael@0 | 648 | mState = mState_ERROR; |
michael@0 | 649 | } else { |
michael@0 | 650 | mData = JIS0208_INDEX[*src & 0x7F]; |
michael@0 | 651 | if (0xFFFD == mData) { |
michael@0 | 652 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 653 | goto error3; |
michael@0 | 654 | mState = mState_ERROR; |
michael@0 | 655 | } else { |
michael@0 | 656 | mState = mState_JISX0208_1978_2ndbyte; |
michael@0 | 657 | } |
michael@0 | 658 | } |
michael@0 | 659 | break; |
michael@0 | 660 | |
michael@0 | 661 | case mState_GB2312_1980: |
michael@0 | 662 | if(0x1b == *src) { |
michael@0 | 663 | mLastLegalState = mState; |
michael@0 | 664 | mState = mState_ESC; |
michael@0 | 665 | } else if(*src & 0x80) { |
michael@0 | 666 | mLastLegalState = mState; |
michael@0 | 667 | mState = mState_ERROR; |
michael@0 | 668 | } else { |
michael@0 | 669 | mData = fbIdx[*src & 0x7F]; |
michael@0 | 670 | if (0xFFFD == mData) { |
michael@0 | 671 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 672 | goto error3; |
michael@0 | 673 | mState = mState_ERROR; |
michael@0 | 674 | } else { |
michael@0 | 675 | mState = mState_GB2312_1980_2ndbyte; |
michael@0 | 676 | } |
michael@0 | 677 | } |
michael@0 | 678 | break; |
michael@0 | 679 | |
michael@0 | 680 | case mState_JISX0208_1983: |
michael@0 | 681 | if(0x1b == *src) { |
michael@0 | 682 | mLastLegalState = mState; |
michael@0 | 683 | mState = mState_ESC; |
michael@0 | 684 | } else if(*src & 0x80) { |
michael@0 | 685 | mLastLegalState = mState; |
michael@0 | 686 | mState = mState_ERROR; |
michael@0 | 687 | } else { |
michael@0 | 688 | mData = JIS0208_INDEX[*src & 0x7F]; |
michael@0 | 689 | if (0xFFFD == mData) { |
michael@0 | 690 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 691 | goto error3; |
michael@0 | 692 | mState = mState_ERROR; |
michael@0 | 693 | } else { |
michael@0 | 694 | mState = mState_JISX0208_1983_2ndbyte; |
michael@0 | 695 | } |
michael@0 | 696 | } |
michael@0 | 697 | break; |
michael@0 | 698 | |
michael@0 | 699 | case mState_KSC5601_1987: |
michael@0 | 700 | if(0x1b == *src) { |
michael@0 | 701 | mLastLegalState = mState; |
michael@0 | 702 | mState = mState_ESC; |
michael@0 | 703 | } else if(*src & 0x80) { |
michael@0 | 704 | mLastLegalState = mState; |
michael@0 | 705 | mState = mState_ERROR; |
michael@0 | 706 | } else { |
michael@0 | 707 | mData = fbIdx[*src & 0x7F]; |
michael@0 | 708 | if (0xFFFD == mData) { |
michael@0 | 709 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 710 | goto error3; |
michael@0 | 711 | mState = mState_ERROR; |
michael@0 | 712 | } else { |
michael@0 | 713 | mState = mState_KSC5601_1987_2ndbyte; |
michael@0 | 714 | } |
michael@0 | 715 | } |
michael@0 | 716 | break; |
michael@0 | 717 | |
michael@0 | 718 | case mState_JISX0212_1990: |
michael@0 | 719 | if(0x1b == *src) { |
michael@0 | 720 | mLastLegalState = mState; |
michael@0 | 721 | mState = mState_ESC; |
michael@0 | 722 | } else if(*src & 0x80) { |
michael@0 | 723 | mLastLegalState = mState; |
michael@0 | 724 | mState = mState_ERROR; |
michael@0 | 725 | } else { |
michael@0 | 726 | mData = JIS0212_INDEX[*src & 0x7F]; |
michael@0 | 727 | if (0xFFFD == mData) { |
michael@0 | 728 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 729 | goto error3; |
michael@0 | 730 | mState = mState_ERROR; |
michael@0 | 731 | } else { |
michael@0 | 732 | mState = mState_JISX0212_1990_2ndbyte; |
michael@0 | 733 | } |
michael@0 | 734 | } |
michael@0 | 735 | break; |
michael@0 | 736 | |
michael@0 | 737 | case mState_JISX0208_1978_2ndbyte: |
michael@0 | 738 | { |
michael@0 | 739 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 740 | goto error1; |
michael@0 | 741 | uint8_t off = sbIdx[*src]; |
michael@0 | 742 | if(0xFF == off) { |
michael@0 | 743 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 744 | goto error3; |
michael@0 | 745 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 746 | } else { |
michael@0 | 747 | // XXX We need to map from JIS X 0208 1983 to 1987 |
michael@0 | 748 | // in the next line before pass to *dest++ |
michael@0 | 749 | *dest++ = gJapaneseMap[mData+off]; |
michael@0 | 750 | } |
michael@0 | 751 | ++mRunLength; |
michael@0 | 752 | mState = mState_JISX0208_1978; |
michael@0 | 753 | } |
michael@0 | 754 | break; |
michael@0 | 755 | |
michael@0 | 756 | case mState_GB2312_1980_2ndbyte: |
michael@0 | 757 | { |
michael@0 | 758 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 759 | goto error1; |
michael@0 | 760 | uint8_t off = sbIdx[*src]; |
michael@0 | 761 | if(0xFF == off) { |
michael@0 | 762 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 763 | goto error3; |
michael@0 | 764 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 765 | } else { |
michael@0 | 766 | if (!mGB2312Decoder) { |
michael@0 | 767 | // creating a delegate converter (GB2312) |
michael@0 | 768 | nsresult rv; |
michael@0 | 769 | nsCOMPtr<nsICharsetConverterManager> ccm = |
michael@0 | 770 | do_GetService(kCharsetConverterManagerCID, &rv); |
michael@0 | 771 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 772 | rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder); |
michael@0 | 773 | } |
michael@0 | 774 | } |
michael@0 | 775 | if (!mGB2312Decoder) {// failed creating a delegate converter |
michael@0 | 776 | goto error2; |
michael@0 | 777 | } else { |
michael@0 | 778 | unsigned char gb[2]; |
michael@0 | 779 | char16_t uni; |
michael@0 | 780 | int32_t gbLen = 2, uniLen = 1; |
michael@0 | 781 | // ((mData/94)+0x21) is the original 1st byte. |
michael@0 | 782 | // *src is the present 2nd byte. |
michael@0 | 783 | // Put 2 bytes (one character) to gb[] with GB2312 encoding. |
michael@0 | 784 | gb[0] = ((mData / 94) + 0x21) | 0x80; |
michael@0 | 785 | gb[1] = *src | 0x80; |
michael@0 | 786 | // Convert GB2312 to unicode. |
michael@0 | 787 | mGB2312Decoder->Convert((const char *)gb, &gbLen, |
michael@0 | 788 | &uni, &uniLen); |
michael@0 | 789 | *dest++ = uni; |
michael@0 | 790 | } |
michael@0 | 791 | } |
michael@0 | 792 | ++mRunLength; |
michael@0 | 793 | mState = mState_GB2312_1980; |
michael@0 | 794 | } |
michael@0 | 795 | break; |
michael@0 | 796 | |
michael@0 | 797 | case mState_JISX0208_1983_2ndbyte: |
michael@0 | 798 | { |
michael@0 | 799 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 800 | goto error1; |
michael@0 | 801 | uint8_t off = sbIdx[*src]; |
michael@0 | 802 | if(0xFF == off) { |
michael@0 | 803 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 804 | goto error3; |
michael@0 | 805 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 806 | } else { |
michael@0 | 807 | *dest++ = gJapaneseMap[mData+off]; |
michael@0 | 808 | } |
michael@0 | 809 | ++mRunLength; |
michael@0 | 810 | mState = mState_JISX0208_1983; |
michael@0 | 811 | } |
michael@0 | 812 | break; |
michael@0 | 813 | |
michael@0 | 814 | case mState_KSC5601_1987_2ndbyte: |
michael@0 | 815 | { |
michael@0 | 816 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 817 | goto error1; |
michael@0 | 818 | uint8_t off = sbIdx[*src]; |
michael@0 | 819 | if(0xFF == off) { |
michael@0 | 820 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 821 | goto error3; |
michael@0 | 822 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 823 | } else { |
michael@0 | 824 | if (!mEUCKRDecoder) { |
michael@0 | 825 | // creating a delegate converter (EUC-KR) |
michael@0 | 826 | nsresult rv; |
michael@0 | 827 | nsCOMPtr<nsICharsetConverterManager> ccm = |
michael@0 | 828 | do_GetService(kCharsetConverterManagerCID, &rv); |
michael@0 | 829 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 830 | rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder); |
michael@0 | 831 | } |
michael@0 | 832 | } |
michael@0 | 833 | if (!mEUCKRDecoder) {// failed creating a delegate converter |
michael@0 | 834 | goto error2; |
michael@0 | 835 | } else { |
michael@0 | 836 | unsigned char ksc[2]; |
michael@0 | 837 | char16_t uni; |
michael@0 | 838 | int32_t kscLen = 2, uniLen = 1; |
michael@0 | 839 | // ((mData/94)+0x21) is the original 1st byte. |
michael@0 | 840 | // *src is the present 2nd byte. |
michael@0 | 841 | // Put 2 bytes (one character) to ksc[] with EUC-KR encoding. |
michael@0 | 842 | ksc[0] = ((mData / 94) + 0x21) | 0x80; |
michael@0 | 843 | ksc[1] = *src | 0x80; |
michael@0 | 844 | // Convert EUC-KR to unicode. |
michael@0 | 845 | mEUCKRDecoder->Convert((const char *)ksc, &kscLen, |
michael@0 | 846 | &uni, &uniLen); |
michael@0 | 847 | *dest++ = uni; |
michael@0 | 848 | } |
michael@0 | 849 | } |
michael@0 | 850 | ++mRunLength; |
michael@0 | 851 | mState = mState_KSC5601_1987; |
michael@0 | 852 | } |
michael@0 | 853 | break; |
michael@0 | 854 | |
michael@0 | 855 | case mState_JISX0212_1990_2ndbyte: |
michael@0 | 856 | { |
michael@0 | 857 | uint8_t off = sbIdx[*src]; |
michael@0 | 858 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 859 | goto error1; |
michael@0 | 860 | if(0xFF == off) { |
michael@0 | 861 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 862 | goto error3; |
michael@0 | 863 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 864 | } else { |
michael@0 | 865 | *dest++ = gJapaneseMap[mData+off]; |
michael@0 | 866 | } |
michael@0 | 867 | ++mRunLength; |
michael@0 | 868 | mState = mState_JISX0212_1990; |
michael@0 | 869 | } |
michael@0 | 870 | break; |
michael@0 | 871 | |
michael@0 | 872 | case mState_ESC_2e: // ESC . |
michael@0 | 873 | // "ESC ." will designate 96 character set to G2. |
michael@0 | 874 | mState = mLastLegalState; |
michael@0 | 875 | if( 'A' == *src) { |
michael@0 | 876 | G2charset = G2_ISO88591; |
michael@0 | 877 | } else if ('F' == *src) { |
michael@0 | 878 | G2charset = G2_ISO88597; |
michael@0 | 879 | } else { |
michael@0 | 880 | if (CHECK_OVERRUN(dest, destEnd, 3)) |
michael@0 | 881 | goto error1; |
michael@0 | 882 | *dest++ = (char16_t) 0x1b; |
michael@0 | 883 | *dest++ = (char16_t) '.'; |
michael@0 | 884 | if (0x80 & *src) { |
michael@0 | 885 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 886 | goto error3; |
michael@0 | 887 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 888 | } else { |
michael@0 | 889 | *dest++ = (char16_t) *src; |
michael@0 | 890 | } |
michael@0 | 891 | } |
michael@0 | 892 | break; |
michael@0 | 893 | |
michael@0 | 894 | case mState_ESC_4e: // ESC N |
michael@0 | 895 | // "ESC N" is the SS2 sequence, that invoke a G2 designated |
michael@0 | 896 | // character set. Since SS2 is effective only for next one |
michael@0 | 897 | // character, mState should be returned to the last status. |
michael@0 | 898 | mState = mLastLegalState; |
michael@0 | 899 | if((0x20 <= *src) && (*src <= 0x7F)) { |
michael@0 | 900 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 901 | goto error1; |
michael@0 | 902 | if (G2_ISO88591 == G2charset) { |
michael@0 | 903 | *dest++ = *src | 0x80; |
michael@0 | 904 | } else if (G2_ISO88597 == G2charset) { |
michael@0 | 905 | if (!mISO88597Decoder) { |
michael@0 | 906 | // creating a delegate converter (ISO-8859-7) |
michael@0 | 907 | nsresult rv; |
michael@0 | 908 | nsCOMPtr<nsICharsetConverterManager> ccm = |
michael@0 | 909 | do_GetService(kCharsetConverterManagerCID, &rv); |
michael@0 | 910 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 911 | rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder); |
michael@0 | 912 | } |
michael@0 | 913 | } |
michael@0 | 914 | if (!mISO88597Decoder) {// failed creating a delegate converter |
michael@0 | 915 | goto error2; |
michael@0 | 916 | } else { |
michael@0 | 917 | // Put one character with ISO-8859-7 encoding. |
michael@0 | 918 | unsigned char gr = *src | 0x80; |
michael@0 | 919 | char16_t uni; |
michael@0 | 920 | int32_t grLen = 1, uniLen = 1; |
michael@0 | 921 | // Convert ISO-8859-7 to unicode. |
michael@0 | 922 | mISO88597Decoder->Convert((const char *)&gr, &grLen, |
michael@0 | 923 | &uni, &uniLen); |
michael@0 | 924 | *dest++ = uni; |
michael@0 | 925 | } |
michael@0 | 926 | } else {// G2charset is G2_unknown (not designated yet) |
michael@0 | 927 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 928 | goto error3; |
michael@0 | 929 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 930 | } |
michael@0 | 931 | ++mRunLength; |
michael@0 | 932 | } else { |
michael@0 | 933 | if (CHECK_OVERRUN(dest, destEnd, 3)) |
michael@0 | 934 | goto error1; |
michael@0 | 935 | *dest++ = (char16_t) 0x1b; |
michael@0 | 936 | *dest++ = (char16_t) 'N'; |
michael@0 | 937 | if (0x80 & *src) { |
michael@0 | 938 | if (mErrBehavior == kOnError_Signal) |
michael@0 | 939 | goto error3; |
michael@0 | 940 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 941 | } else { |
michael@0 | 942 | *dest++ = (char16_t) *src; |
michael@0 | 943 | } |
michael@0 | 944 | } |
michael@0 | 945 | break; |
michael@0 | 946 | |
michael@0 | 947 | case mState_ERROR: |
michael@0 | 948 | mState = mLastLegalState; |
michael@0 | 949 | if (mErrBehavior == kOnError_Signal) { |
michael@0 | 950 | mRunLength = 0; |
michael@0 | 951 | goto error3; |
michael@0 | 952 | } |
michael@0 | 953 | if (CHECK_OVERRUN(dest, destEnd, 1)) |
michael@0 | 954 | goto error1; |
michael@0 | 955 | *dest++ = UNICODE_REPLACEMENT_CHARACTER; |
michael@0 | 956 | ++mRunLength; |
michael@0 | 957 | break; |
michael@0 | 958 | |
michael@0 | 959 | } // switch |
michael@0 | 960 | src++; |
michael@0 | 961 | } |
michael@0 | 962 | *aDestLen = dest - aDest; |
michael@0 | 963 | return NS_OK; |
michael@0 | 964 | error1: |
michael@0 | 965 | *aDestLen = dest - aDest; |
michael@0 | 966 | *aSrcLen = src - (const unsigned char*)aSrc; |
michael@0 | 967 | return NS_OK_UDEC_MOREOUTPUT; |
michael@0 | 968 | error2: |
michael@0 | 969 | *aDestLen = dest - aDest; |
michael@0 | 970 | *aSrcLen = src - (const unsigned char*)aSrc; |
michael@0 | 971 | return NS_ERROR_UNEXPECTED; |
michael@0 | 972 | error3: |
michael@0 | 973 | *aDestLen = dest - aDest; |
michael@0 | 974 | *aSrcLen = src - (const unsigned char*)aSrc; |
michael@0 | 975 | return NS_ERROR_ILLEGAL_INPUT; |
michael@0 | 976 | } |