intl/uconv/ucvja/nsJapaneseToUnicode.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     5 #include "nsJapaneseToUnicode.h"
     7 #include "nsUCSupport.h"
     9 #include "japanese.map"
    11 #include "nsICharsetConverterManager.h"
    12 #include "nsServiceManagerUtils.h"
    14 #include "mozilla/Assertions.h"
    16 // HTML5 says to use Windows-31J instead of the real Shift_JIS for decoding
    17 #define SJIS_INDEX gCP932Index[0]
    18 #define JIS0208_INDEX gCP932Index[1]
    20 #define JIS0212_INDEX gJIS0212Index
    21 #define SJIS_UNMAPPED	0x30fb
    22 #define UNICODE_REPLACEMENT_CHARACTER 0xfffd
    23 #define IN_GR_RANGE(b) \
    24   ((uint8_t(0xa1) <= uint8_t(b)) && (uint8_t(b) <= uint8_t(0xfe)))
    26 NS_IMETHODIMP nsShiftJISToUnicode::Convert(
    27    const char * aSrc, int32_t * aSrcLen,
    28      char16_t * aDest, int32_t * aDestLen)
    29 {
    30    static const uint8_t sbIdx[256] =
    31    {
    32      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x00 */
    33      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x08 */
    34      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x10 */
    35      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x18 */
    36      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x20 */
    37      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x28 */
    38      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x30 */
    39      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,  /* 0x38 */
    40         0,    1,    2,    3,    4,    5,    6,    7,  /* 0x40 */
    41         8,    9,   10,   11,   12,   13,   14,   15,  /* 0x48 */
    42        16,   17,   18,   19,   20,   21,   22,   23,  /* 0x50 */
    43        24,   25,   26,   27,   28,   29,   30,   31,  /* 0x58 */
    44        32,   33,   34,   35,   36,   37,   38,   39,  /* 0x60 */
    45        40,   41,   42,   43,   44,   45,   46,   47,  /* 0x68 */
    46        48,   49,   50,   51,   52,   53,   54,   55,  /* 0x70 */
    47        56,   57,   58,   59,   60,   61,   62, 0xFF,  /* 0x78 */
    48        63,   64,   65,   66,   67,   68,   69,   70,  /* 0x80 */
    49        71,   72,   73,   74,   75,   76,   77,   78,  /* 0x88 */
    50        79,   80,   81,   82,   83,   84,   85,   86,  /* 0x90 */
    51        87,   88,   89,   90,   91,   92,   93,   94,  /* 0x98 */
    52        95,   96,   97,   98,   99,  100,  101,  102,  /* 0xa0 */
    53       103,  104,  105,  106,  107,  108,  109,  110,  /* 0xa8 */
    54       111,  112,  113,  114,  115,  116,  117,  118,  /* 0xb0 */
    55       119,  120,  121,  122,  123,  124,  125,  126,  /* 0xb8 */
    56       127,  128,  129,  130,  131,  132,  133,  134,  /* 0xc0 */
    57       135,  136,  137,  138,  139,  140,  141,  142,  /* 0xc8 */
    58       143,  144,  145,  146,  147,  148,  149,  150,  /* 0xd0 */
    59       151,  152,  153,  154,  155,  156,  157,  158,  /* 0xd8 */
    60       159,  160,  161,  162,  163,  164,  165,  166,  /* 0xe0 */
    61       167,  168,  169,  170,  171,  172,  173,  174,  /* 0xe8 */
    62       175,  176,  177,  178,  179,  180,  181,  182,  /* 0xf0 */
    63       183,  184,  185,  186,  187, 0xFF, 0xFF, 0xFF,  /* 0xf8 */
    64    };
    66    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
    67    const unsigned char* src =(unsigned char*) aSrc;
    68    char16_t* destEnd = aDest + *aDestLen;
    69    char16_t* dest = aDest;
    70    while (src < srcEnd) {
    71        switch (mState) {
    72           case 0:
    73           if (*src <= 0x80) {
    74             // ASCII
    75             *dest++ = (char16_t) *src;
    76             if (dest >= destEnd) {
    77               goto error1;
    78             }
    79           } else {
    80             mData = SJIS_INDEX[*src & 0x7F];
    81             if (mData < 0xE000) {
    82               mState = 1; // two bytes
    83             } else if (mData < 0xF000) {
    84               mState = 2; // EUDC
    85             } else {
    86               *dest++ = mData; // JIS 0201
    87               if (dest >= destEnd) {
    88                 goto error1;
    89               }
    90             }
    91           }
    92           break;
    94           case 1: // Index to table
    95           {
    96             MOZ_ASSERT(mData < 0xE000);
    97             uint8_t off = sbIdx[*src];
    99             // Error handling: in the case where the second octet is not in the
   100             // valid ranges 0x40-0x7E 0x80-0xFC, unconsume the invalid octet and
   101             // interpret it as the ASCII value. In the case where the second
   102             // octet is in the valid range but there is no mapping for the
   103             // 2-octet sequence, do not unconsume.
   104             if(0xFF == off) {
   105                src--;
   106                if (mErrBehavior == kOnError_Signal)
   107                  goto error_invalidchar;
   108                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   109             } else {
   110                char16_t ch = gJapaneseMap[mData+off];
   111                if(ch == 0xfffd) {
   112                  if (mErrBehavior == kOnError_Signal)
   113                    goto error_invalidchar;
   114                  ch = SJIS_UNMAPPED;
   115                }
   116                *dest++ = ch;
   117             }
   118             mState = 0;
   119             if(dest >= destEnd)
   120               goto error1;
   121           }
   122           break;
   124           case 2: // EUDC
   125           {
   126             MOZ_ASSERT(0xE000 <= mData && mData < 0xF000);
   127             uint8_t off = sbIdx[*src];
   129             // Error handling as in case 1
   130             if(0xFF == off) {
   131                src--;
   132                if (mErrBehavior == kOnError_Signal)
   133                  goto error_invalidchar;
   135                *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   136             } else {
   137                *dest++ = mData + off;
   138             }
   139             mState = 0;
   140             if(dest >= destEnd)
   141               goto error1;
   142           }
   143           break;
   145        }
   146        src++;
   147    }
   148    *aDestLen = dest - aDest;
   149    return NS_OK;
   150 error_invalidchar:
   151    *aDestLen = dest - aDest;
   152    *aSrcLen = src - (const unsigned char*)aSrc;
   153    return NS_ERROR_ILLEGAL_INPUT;
   154 error1:
   155    *aDestLen = dest - aDest;
   156    src++;
   157    if ((mState == 0) && (src == srcEnd)) {
   158      return NS_OK;
   159    }
   160    *aSrcLen = src - (const unsigned char*)aSrc;
   161    return NS_OK_UDEC_MOREOUTPUT;
   162 }
   164 char16_t
   165 nsShiftJISToUnicode::GetCharacterForUnMapped()
   166 {
   167   return char16_t(SJIS_UNMAPPED);
   168 }
   170 NS_IMETHODIMP nsEUCJPToUnicodeV2::Convert(
   171    const char * aSrc, int32_t * aSrcLen,
   172      char16_t * aDest, int32_t * aDestLen)
   173 {
   174    static const uint8_t sbIdx[256] =
   175    {
   176 /* 0x0X */
   177      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 
   178      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   179 /* 0x1X */
   180      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   181      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   182 /* 0x2X */
   183      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   184      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   185 /* 0x3X */
   186      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   187      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   188 /* 0x4X */
   189      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   190      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   191 /* 0x5X */
   192      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   193      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   194 /* 0x6X */
   195      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   196      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   197 /* 0x7X */
   198      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   199      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   200 /* 0x8X */
   201      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   202      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   203 /* 0x9X */
   204      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   205      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   206 /* 0xAX */
   207      0xFF, 0,    1,    2,    3,    4,    5,    6,  
   208      7,    8 ,   9,    10,   11,   12,   13,   14,
   209 /* 0xBX */
   210      15,   16,   17,   18,   19,   20,   21,   22, 
   211      23,   24,   25,   26,   27,   28,   29,   30, 
   212 /* 0xCX */
   213      31,   32,   33,   34,   35,   36,   37,   38, 
   214      39,   40,   41,   42,   43,   44,   45,   46, 
   215 /* 0xDX */
   216      47,   48,   49,   50,   51,   52,   53,   54, 
   217      55,   56,   57,   58,   59,   60,   61,   62, 
   218 /* 0xEX */
   219      63,   64,   65,   66,   67,   68,   69,   70, 
   220      71,   72,   73,   74,   75,   76,   77,   78, 
   221 /* 0xFX */
   222      79,   80,   81,   82,   83,   84,   85,   86, 
   223      87,   88,   89,   90,   91,   92,   93,   0xFF, 
   224    };
   226    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
   227    const unsigned char* src =(unsigned char*) aSrc;
   228    char16_t* destEnd = aDest + *aDestLen;
   229    char16_t* dest = aDest;
   230    while((src < srcEnd))
   231    {
   232        switch(mState)
   233        {
   234           case 0:
   235           if(*src & 0x80  && *src != (unsigned char)0xa0)
   236           {
   237             mData = JIS0208_INDEX[*src & 0x7F];
   238             if(mData != 0xFFFD )
   239             {
   240                mState = 1; // two byte JIS0208
   241             } else {
   242                if( 0x8e == *src) {
   243                  // JIS 0201
   244                  mState = 2; // JIS0201
   245                } else if(0x8f == *src) {
   246                  // JIS 0212
   247                  mState = 3; // JIS0212
   248                } else {
   249                  // others 
   250                  if (mErrBehavior == kOnError_Signal)
   251                    goto error_invalidchar;
   252                  *dest++ = 0xFFFD;
   253                  if(dest >= destEnd)
   254                    goto error1;
   255                }
   256             }
   257           } else {
   258             // ASCII
   259             *dest++ = (char16_t) *src;
   260             if(dest >= destEnd)
   261               goto error1;
   262           }
   263           break;
   265           case 1: // Index to table
   266           {
   267             uint8_t off = sbIdx[*src];
   268             if(0xFF == off) {
   269               if (mErrBehavior == kOnError_Signal)
   270                 goto error_invalidchar;
   271               *dest++ = 0xFFFD;
   272                // if the first byte is valid for EUC-JP but the second 
   273                // is not while being a valid US-ASCII, save it
   274                // instead of eating it up !
   275               if ( (uint8_t)*src < (uint8_t)0x7f )
   276                 --src;
   277             } else {
   278                *dest++ = gJapaneseMap[mData+off];
   279             }
   280             mState = 0;
   281             if(dest >= destEnd)
   282               goto error1;
   283           }
   284           break;
   286           case 2: // JIS 0201
   287           {
   288             if((0xA1 <= *src) && (*src <= 0xDF)) {
   289               *dest++ = (0xFF61-0x00A1) + *src;
   290             } else {
   291               if (mErrBehavior == kOnError_Signal)
   292                 goto error_invalidchar;
   293               *dest++ = 0xFFFD;             
   294               // if 0x8e is not followed by a valid JIS X 0201 byte
   295               // but by a valid US-ASCII, save it instead of eating it up.
   296               if ( (uint8_t)*src < (uint8_t)0x7f )
   297                 --src;
   298             }
   299             mState = 0;
   300             if(dest >= destEnd)
   301               goto error1;
   302           }
   303           break;
   305           case 3: // JIS 0212
   306           {
   307             if (IN_GR_RANGE(*src))
   308             {
   309               mData = JIS0212_INDEX[*src & 0x7F];
   310               if(mData != 0xFFFD )
   311               {
   312                  mState = 4; 
   313               } else {
   314                  mState = 5; // error
   315               }
   316             } else {
   317               // First "JIS 0212" byte is not in the valid GR range: save it
   318               if (mErrBehavior == kOnError_Signal)
   319                 goto error_invalidchar;
   320               *dest++ = 0xFFFD;
   321               --src;
   322               mState = 0;
   323               if(dest >= destEnd)
   324                 goto error1;
   325             }
   326           }
   327           break;
   328           case 4:
   329           {
   330             uint8_t off = sbIdx[*src];
   331             if(0xFF != off) {
   332               *dest++ = gJapaneseMap[mData+off];
   333               mState = 0;
   334               if(dest >= destEnd)
   335                 goto error1;
   336               break;
   337             }
   338             // else fall through to error handler
   339           }
   340           case 5: // two bytes undefined
   341           {
   342             if (mErrBehavior == kOnError_Signal)
   343               goto error_invalidchar;
   344             *dest++ = 0xFFFD;
   345             // Undefined JIS 0212 two byte sequence. If the second byte is in
   346             // the valid range for a two byte sequence (0xa1 - 0xfe) consume
   347             // both bytes. Otherwise resynchronize on the second byte.
   348             if (!IN_GR_RANGE(*src))
   349               --src;
   350             mState = 0;
   351             if(dest >= destEnd)
   352               goto error1;
   353           }
   354           break;
   355        }
   356        src++;
   357    }
   358    *aDestLen = dest - aDest;
   359    return NS_OK;
   360 error_invalidchar:
   361    *aDestLen = dest - aDest;
   362    *aSrcLen = src - (const unsigned char*)aSrc;
   363    return NS_ERROR_ILLEGAL_INPUT;
   364 error1:
   365    *aDestLen = dest - aDest;
   366    src++;
   367    if ((mState == 0) && (src == srcEnd)) {
   368      return NS_OK;
   369    } 
   370    *aSrcLen = src - (const unsigned char*)aSrc;
   371    return NS_OK_UDEC_MOREOUTPUT;
   372 }
   376 NS_IMETHODIMP nsISO2022JPToUnicodeV2::Convert(
   377    const char * aSrc, int32_t * aSrcLen,
   378      char16_t * aDest, int32_t * aDestLen)
   379 {
   380    static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
   382    static const uint16_t fbIdx[128] =
   383    {
   384 /* 0x8X */
   385      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   386      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   387 /* 0x9X */
   388      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   389      0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
   390 /* 0xAX */
   391      0xFFFD, 0,      94,     94* 2,  94* 3,  94* 4,  94* 5,  94* 6,  
   392      94* 7,  94* 8 , 94* 9,  94*10,  94*11,  94*12,  94*13,  94*14,
   393 /* 0xBX */
   394      94*15,  94*16,  94*17,  94*18,  94*19,  94*20,  94*21,  94*22,
   395      94*23,  94*24,  94*25,  94*26,  94*27,  94*28,  94*29,  94*30,
   396 /* 0xCX */
   397      94*31,  94*32,  94*33,  94*34,  94*35,  94*36,  94*37,  94*38,
   398      94*39,  94*40,  94*41,  94*42,  94*43,  94*44,  94*45,  94*46,
   399 /* 0xDX */
   400      94*47,  94*48,  94*49,  94*50,  94*51,  94*52,  94*53,  94*54,
   401      94*55,  94*56,  94*57,  94*58,  94*59,  94*60,  94*61,  94*62,
   402 /* 0xEX */
   403      94*63,  94*64,  94*65,  94*66,  94*67,  94*68,  94*69,  94*70,
   404      94*71,  94*72,  94*73,  94*74,  94*75,  94*76,  94*77,  94*78,
   405 /* 0xFX */
   406      94*79,  94*80,  94*81,  94*82,  94*83,  94*84,  94*85,  94*86,
   407      94*87,  94*88,  94*89,  94*90,  94*91,  94*92,  94*93,  0xFFFD,
   408    };
   409    static const uint8_t sbIdx[256] =
   410    {
   411 /* 0x0X */
   412      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   413      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   414 /* 0x1X */
   415      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   416      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   417 /* 0x2X */
   418      0xFF, 0,    1,    2,    3,    4,    5,    6,  
   419      7,    8 ,   9,    10,   11,   12,   13,   14,
   420 /* 0x3X */
   421      15,   16,   17,   18,   19,   20,   21,   22, 
   422      23,   24,   25,   26,   27,   28,   29,   30, 
   423 /* 0x4X */
   424      31,   32,   33,   34,   35,   36,   37,   38, 
   425      39,   40,   41,   42,   43,   44,   45,   46, 
   426 /* 0x5X */
   427      47,   48,   49,   50,   51,   52,   53,   54, 
   428      55,   56,   57,   58,   59,   60,   61,   62, 
   429 /* 0x6X */
   430      63,   64,   65,   66,   67,   68,   69,   70, 
   431      71,   72,   73,   74,   75,   76,   77,   78, 
   432 /* 0x7X */
   433      79,   80,   81,   82,   83,   84,   85,   86, 
   434      87,   88,   89,   90,   91,   92,   93,   0xFF, 
   435 /* 0x8X */
   436      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   437      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   438 /* 0x9X */
   439      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   440      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   441 /* 0xAX */
   442      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   443      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   444 /* 0xBX */
   445      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   446      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   447 /* 0xCX */
   448      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   449      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   450 /* 0xDX */
   451      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   452      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   453 /* 0xEX */
   454      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   455      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   456 /* 0xFX */
   457      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   458      0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
   459    };
   461    const unsigned char* srcEnd = (unsigned char*)aSrc + *aSrcLen;
   462    const unsigned char* src =(unsigned char*) aSrc;
   463    char16_t* destEnd = aDest + *aDestLen;
   464    char16_t* dest = aDest;
   465    while((src < srcEnd))
   466    {
   468        switch(mState)
   469        {
   470           case mState_ASCII:
   471             if(0x1b == *src)
   472             {
   473               mLastLegalState = mState;
   474               mState = mState_ESC;
   475             } else if(*src & 0x80) {
   476               if (mErrBehavior == kOnError_Signal)
   477                 goto error3;
   478               if (CHECK_OVERRUN(dest, destEnd, 1))
   479                 goto error1;
   480               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   481             } else {
   482               if (CHECK_OVERRUN(dest, destEnd, 1))
   483                 goto error1;
   484               *dest++ = (char16_t) *src;
   485             }
   486           break;
   488           case mState_ESC:
   489             if( '(' == *src) {
   490               mState = mState_ESC_28;
   491             } else if ('$' == *src)  {
   492               mState = mState_ESC_24;
   493             } else if ('.' == *src)  { // for ISO-2022-JP-2
   494               mState = mState_ESC_2e;
   495             } else if ('N' == *src)  { // for ISO-2022-JP-2
   496               mState = mState_ESC_4e;
   497             } else  {
   498               if (CHECK_OVERRUN(dest, destEnd, 2))
   499                 goto error1;
   500               *dest++ = (char16_t) 0x1b;
   501               if (0x80 & *src) {
   502                 if (mErrBehavior == kOnError_Signal)
   503                   goto error3;
   504                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   505               } else {
   506                 *dest++ = (char16_t) *src;
   507               }
   508               mState = mLastLegalState;
   509             }
   510           break;
   512           case mState_ESC_28: // ESC (
   513             if( 'B' == *src) {
   514               mState = mState_ASCII;
   515               if (mRunLength == 0) {
   516                 if (CHECK_OVERRUN(dest, destEnd, 1))
   517                   goto error1;
   518                 *dest++ = 0xFFFD;
   519               }
   520               mRunLength = 0;
   521             } else if ('J' == *src)  {
   522               mState = mState_JISX0201_1976Roman;
   523               if (mRunLength == 0 && mLastLegalState != mState_ASCII) {
   524                 if (CHECK_OVERRUN(dest, destEnd, 1))
   525                   goto error1;
   526                 if (mErrBehavior == kOnError_Signal)
   527                   goto error3;
   528                 *dest++ = 0xFFFD;
   529               }
   530               mRunLength = 0;
   531             } else if ('I' == *src)  {
   532               mState = mState_JISX0201_1976Kana;
   533               mRunLength = 0;
   534             } else  {
   535               if (CHECK_OVERRUN(dest, destEnd, 3))
   536                 goto error1;
   537               *dest++ = (char16_t) 0x1b;
   538               *dest++ = (char16_t) '(';
   539               if (0x80 & *src) {
   540                 if (mErrBehavior == kOnError_Signal)
   541                   goto error3;
   542                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   543               } else {
   544                 *dest++ = (char16_t) *src;
   545               }
   546               mState = mLastLegalState;
   547             }
   548           break;
   550           case mState_ESC_24: // ESC $
   551             if( '@' == *src) {
   552               mState = mState_JISX0208_1978;
   553               mRunLength = 0;
   554             } else if ('A' == *src)  {
   555               mState = mState_GB2312_1980;
   556               mRunLength = 0;
   557             } else if ('B' == *src)  {
   558               mState = mState_JISX0208_1983;
   559               mRunLength = 0;
   560             } else if ('(' == *src)  {
   561               mState = mState_ESC_24_28;
   562             } else  {
   563               if (CHECK_OVERRUN(dest, destEnd, 3))
   564                 goto error1;
   565               *dest++ = (char16_t) 0x1b;
   566               *dest++ = (char16_t) '$';
   567               if (0x80 & *src) {
   568                 if (mErrBehavior == kOnError_Signal)
   569                   goto error3;
   570                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   571               } else {
   572                 *dest++ = (char16_t) *src;
   573               }
   574               mState = mLastLegalState;
   575             }
   576           break;
   578           case mState_ESC_24_28: // ESC $ (
   579             if( 'C' == *src) {
   580               mState = mState_KSC5601_1987;
   581               mRunLength = 0;
   582             } else if ('D' == *src) {
   583               mState = mState_JISX0212_1990;
   584               mRunLength = 0;
   585             } else  {
   586               if (CHECK_OVERRUN(dest, destEnd, 4))
   587                 goto error1;
   588               *dest++ = (char16_t) 0x1b;
   589               *dest++ = (char16_t) '$';
   590               *dest++ = (char16_t) '(';
   591               if (0x80 & *src) {
   592                 if (mErrBehavior == kOnError_Signal)
   593                   goto error3;
   594                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   595               } else {
   596                 *dest++ = (char16_t) *src;
   597               }
   598               mState = mLastLegalState;
   599             }
   600           break;
   602           case mState_JISX0201_1976Roman:
   603             if(0x1b == *src) {
   604               mLastLegalState = mState;
   605               mState = mState_ESC;
   606             } else if(*src & 0x80) {
   607               if (mErrBehavior == kOnError_Signal)
   608                 goto error3;
   609               if (CHECK_OVERRUN(dest, destEnd, 1))
   610                 goto error1;
   611               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   612               ++mRunLength;
   613             } else {
   614               // XXX We need to  decide how to handle \ and ~ here
   615               // we may need a if statement here for '\' and '~' 
   616               // to map them to Yen and Overbar
   617               if (CHECK_OVERRUN(dest, destEnd, 1))
   618                 goto error1;
   619               *dest++ = (char16_t) *src;
   620               ++mRunLength;
   621             }
   622           break;
   624           case mState_JISX0201_1976Kana:
   625             if(0x1b == *src) {
   626               mLastLegalState = mState;
   627               mState = mState_ESC;
   628             } else {
   629               if (CHECK_OVERRUN(dest, destEnd, 1))
   630                 goto error1;
   631               if((0x21 <= *src) && (*src <= 0x5F)) {
   632                 *dest++ = (0xFF61-0x0021) + *src;
   633               } else {
   634                 if (mErrBehavior == kOnError_Signal)
   635                   goto error3;
   636                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   637               }
   638               ++mRunLength;
   639             }
   640           break;
   642           case mState_JISX0208_1978:
   643             if(0x1b == *src) {
   644               mLastLegalState = mState;
   645               mState = mState_ESC;
   646             } else if(*src & 0x80) {
   647               mLastLegalState = mState;
   648               mState = mState_ERROR;
   649             } else {
   650               mData = JIS0208_INDEX[*src & 0x7F];
   651               if (0xFFFD == mData) {
   652                 if (mErrBehavior == kOnError_Signal)
   653                   goto error3;
   654                 mState = mState_ERROR;
   655               } else {
   656                 mState = mState_JISX0208_1978_2ndbyte;
   657               }
   658             }
   659           break;
   661           case mState_GB2312_1980:
   662             if(0x1b == *src) {
   663               mLastLegalState = mState;
   664               mState = mState_ESC;
   665             } else if(*src & 0x80) {
   666               mLastLegalState = mState;
   667               mState = mState_ERROR;
   668             } else {
   669               mData = fbIdx[*src & 0x7F];
   670               if (0xFFFD == mData) {
   671                 if (mErrBehavior == kOnError_Signal)
   672                   goto error3;
   673                 mState = mState_ERROR;
   674               } else {
   675                 mState = mState_GB2312_1980_2ndbyte;
   676               }
   677             }
   678           break;
   680           case mState_JISX0208_1983:
   681             if(0x1b == *src) {
   682               mLastLegalState = mState;
   683               mState = mState_ESC;
   684             } else if(*src & 0x80) {
   685               mLastLegalState = mState;
   686               mState = mState_ERROR;
   687             } else {
   688               mData = JIS0208_INDEX[*src & 0x7F];
   689               if (0xFFFD == mData) {
   690                 if (mErrBehavior == kOnError_Signal)
   691                   goto error3;
   692                 mState = mState_ERROR;
   693               } else {
   694                 mState = mState_JISX0208_1983_2ndbyte;
   695               }
   696             }
   697           break;
   699           case mState_KSC5601_1987:
   700             if(0x1b == *src) {
   701               mLastLegalState = mState;
   702               mState = mState_ESC;
   703             } else if(*src & 0x80) {
   704               mLastLegalState = mState;
   705               mState = mState_ERROR;
   706             } else {
   707               mData = fbIdx[*src & 0x7F];
   708               if (0xFFFD == mData) {
   709                 if (mErrBehavior == kOnError_Signal)
   710                   goto error3;
   711                 mState = mState_ERROR;
   712               } else {
   713                 mState = mState_KSC5601_1987_2ndbyte;
   714               }
   715             }
   716           break;
   718           case mState_JISX0212_1990:
   719             if(0x1b == *src) {
   720               mLastLegalState = mState;
   721               mState = mState_ESC;
   722             } else if(*src & 0x80) {
   723               mLastLegalState = mState;
   724               mState = mState_ERROR;
   725             } else {
   726               mData = JIS0212_INDEX[*src & 0x7F];
   727               if (0xFFFD == mData) {
   728                 if (mErrBehavior == kOnError_Signal)
   729                   goto error3;
   730                 mState = mState_ERROR;
   731               } else {
   732                 mState = mState_JISX0212_1990_2ndbyte;
   733               }
   734             }
   735           break;
   737           case mState_JISX0208_1978_2ndbyte:
   738           {
   739             if (CHECK_OVERRUN(dest, destEnd, 1))
   740               goto error1;
   741             uint8_t off = sbIdx[*src];
   742             if(0xFF == off) {
   743               if (mErrBehavior == kOnError_Signal)
   744                 goto error3;
   745               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   746             } else {
   747                // XXX We need to map from JIS X 0208 1983 to 1987 
   748                // in the next line before pass to *dest++
   749               *dest++ = gJapaneseMap[mData+off];
   750             }
   751             ++mRunLength;
   752             mState = mState_JISX0208_1978;
   753           }
   754           break;
   756           case mState_GB2312_1980_2ndbyte:
   757           {
   758             if (CHECK_OVERRUN(dest, destEnd, 1))
   759               goto error1;
   760             uint8_t off = sbIdx[*src];
   761             if(0xFF == off) {
   762               if (mErrBehavior == kOnError_Signal)
   763                 goto error3;
   764               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   765             } else {
   766               if (!mGB2312Decoder) {
   767                 // creating a delegate converter (GB2312)
   768                 nsresult rv;
   769                 nsCOMPtr<nsICharsetConverterManager> ccm = 
   770                          do_GetService(kCharsetConverterManagerCID, &rv);
   771                 if (NS_SUCCEEDED(rv)) {
   772                   rv = ccm->GetUnicodeDecoderRaw("GB2312", &mGB2312Decoder);
   773                 }
   774               }
   775               if (!mGB2312Decoder) {// failed creating a delegate converter
   776                 goto error2;
   777               } else {
   778                 unsigned char gb[2];
   779                 char16_t uni;
   780                 int32_t gbLen = 2, uniLen = 1;
   781                 // ((mData/94)+0x21) is the original 1st byte.
   782                 // *src is the present 2nd byte.
   783                 // Put 2 bytes (one character) to gb[] with GB2312 encoding.
   784                 gb[0] = ((mData / 94) + 0x21) | 0x80;
   785                 gb[1] = *src | 0x80;
   786                 // Convert GB2312 to unicode.
   787                 mGB2312Decoder->Convert((const char *)gb, &gbLen,
   788                                         &uni, &uniLen);
   789                 *dest++ = uni;
   790               }
   791             }
   792             ++mRunLength;
   793             mState = mState_GB2312_1980;
   794           }
   795           break;
   797           case mState_JISX0208_1983_2ndbyte:
   798           {
   799             if (CHECK_OVERRUN(dest, destEnd, 1))
   800               goto error1;
   801             uint8_t off = sbIdx[*src];
   802             if(0xFF == off) {
   803               if (mErrBehavior == kOnError_Signal)
   804                 goto error3;
   805               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   806             } else {
   807               *dest++ = gJapaneseMap[mData+off];
   808             }
   809             ++mRunLength;
   810             mState = mState_JISX0208_1983;
   811           }
   812           break;
   814           case mState_KSC5601_1987_2ndbyte:
   815           {
   816             if (CHECK_OVERRUN(dest, destEnd, 1))
   817               goto error1;
   818             uint8_t off = sbIdx[*src];
   819             if(0xFF == off) {
   820               if (mErrBehavior == kOnError_Signal)
   821                 goto error3;
   822               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   823             } else {
   824               if (!mEUCKRDecoder) {
   825                 // creating a delegate converter (EUC-KR)
   826                 nsresult rv;
   827                 nsCOMPtr<nsICharsetConverterManager> ccm = 
   828                          do_GetService(kCharsetConverterManagerCID, &rv);
   829                 if (NS_SUCCEEDED(rv)) {
   830                   rv = ccm->GetUnicodeDecoderRaw("EUC-KR", &mEUCKRDecoder);
   831                 }
   832               }
   833               if (!mEUCKRDecoder) {// failed creating a delegate converter
   834                 goto error2;
   835               } else {              
   836                 unsigned char ksc[2];
   837                 char16_t uni;
   838                 int32_t kscLen = 2, uniLen = 1;
   839                 // ((mData/94)+0x21) is the original 1st byte.
   840                 // *src is the present 2nd byte.
   841                 // Put 2 bytes (one character) to ksc[] with EUC-KR encoding.
   842                 ksc[0] = ((mData / 94) + 0x21) | 0x80;
   843                 ksc[1] = *src | 0x80;
   844                 // Convert EUC-KR to unicode.
   845                 mEUCKRDecoder->Convert((const char *)ksc, &kscLen,
   846                                        &uni, &uniLen);
   847                 *dest++ = uni;
   848               }
   849             }
   850             ++mRunLength;
   851             mState = mState_KSC5601_1987;
   852           }
   853           break;
   855           case mState_JISX0212_1990_2ndbyte:
   856           {
   857             uint8_t off = sbIdx[*src];
   858             if (CHECK_OVERRUN(dest, destEnd, 1))
   859               goto error1;
   860             if(0xFF == off) {
   861               if (mErrBehavior == kOnError_Signal)
   862                 goto error3;
   863               *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   864             } else {
   865               *dest++ = gJapaneseMap[mData+off];
   866             }
   867             ++mRunLength;
   868             mState = mState_JISX0212_1990;
   869           }
   870           break;
   872           case mState_ESC_2e: // ESC .
   873             // "ESC ." will designate 96 character set to G2.
   874             mState = mLastLegalState;
   875             if( 'A' == *src) {
   876               G2charset = G2_ISO88591;
   877             } else if ('F' == *src) {
   878               G2charset = G2_ISO88597;
   879             } else  {
   880               if (CHECK_OVERRUN(dest, destEnd, 3))
   881                 goto error1;
   882               *dest++ = (char16_t) 0x1b;
   883               *dest++ = (char16_t) '.';
   884               if (0x80 & *src) {
   885                 if (mErrBehavior == kOnError_Signal)
   886                   goto error3;
   887                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   888               } else {
   889                 *dest++ = (char16_t) *src;
   890               }
   891             }
   892           break;
   894           case mState_ESC_4e: // ESC N
   895             // "ESC N" is the SS2 sequence, that invoke a G2 designated
   896             // character set.  Since SS2 is effective only for next one
   897             // character, mState should be returned to the last status.
   898             mState = mLastLegalState;
   899             if((0x20 <= *src) && (*src <= 0x7F)) {
   900               if (CHECK_OVERRUN(dest, destEnd, 1))
   901                 goto error1;
   902               if (G2_ISO88591 == G2charset) {
   903                 *dest++ = *src | 0x80;
   904               } else if (G2_ISO88597 == G2charset) {
   905                 if (!mISO88597Decoder) {
   906                   // creating a delegate converter (ISO-8859-7)
   907                   nsresult rv;
   908                   nsCOMPtr<nsICharsetConverterManager> ccm = 
   909                            do_GetService(kCharsetConverterManagerCID, &rv);
   910                   if (NS_SUCCEEDED(rv)) {
   911                     rv = ccm->GetUnicodeDecoderRaw("ISO-8859-7", &mISO88597Decoder);
   912                   }
   913                 }
   914                 if (!mISO88597Decoder) {// failed creating a delegate converter
   915                   goto error2;
   916                 } else {
   917                   // Put one character with ISO-8859-7 encoding.
   918                   unsigned char gr = *src | 0x80;
   919                   char16_t uni;
   920                   int32_t grLen = 1, uniLen = 1;
   921                   // Convert ISO-8859-7 to unicode.
   922                   mISO88597Decoder->Convert((const char *)&gr, &grLen,
   923                                             &uni, &uniLen);
   924                   *dest++ = uni;
   925                 }
   926               } else {// G2charset is G2_unknown (not designated yet)
   927                 if (mErrBehavior == kOnError_Signal)
   928                   goto error3;
   929                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   930               }
   931               ++mRunLength;
   932             } else {
   933               if (CHECK_OVERRUN(dest, destEnd, 3))
   934                 goto error1;
   935               *dest++ = (char16_t) 0x1b;
   936               *dest++ = (char16_t) 'N';
   937               if (0x80 & *src) {
   938                 if (mErrBehavior == kOnError_Signal)
   939                   goto error3;
   940                 *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   941               } else {
   942                 *dest++ = (char16_t) *src;
   943               }
   944             }
   945           break;
   947           case mState_ERROR:
   948             mState = mLastLegalState;
   949             if (mErrBehavior == kOnError_Signal) {
   950               mRunLength = 0;
   951               goto error3;
   952             }
   953             if (CHECK_OVERRUN(dest, destEnd, 1))
   954               goto error1;
   955             *dest++ = UNICODE_REPLACEMENT_CHARACTER;
   956             ++mRunLength;
   957           break;
   959        } // switch
   960        src++;
   961    }
   962    *aDestLen = dest - aDest;
   963    return NS_OK;
   964 error1:
   965    *aDestLen = dest - aDest;
   966    *aSrcLen = src - (const unsigned char*)aSrc;
   967    return NS_OK_UDEC_MOREOUTPUT;
   968 error2:
   969    *aDestLen = dest - aDest;
   970    *aSrcLen = src - (const unsigned char*)aSrc;
   971    return NS_ERROR_UNEXPECTED;
   972 error3:
   973    *aDestLen = dest - aDest;
   974    *aSrcLen = src - (const unsigned char*)aSrc;
   975    return NS_ERROR_ILLEGAL_INPUT;
   976 }

mercurial