intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 #include "nsUnicodeToISO2022JP.h"
     7 #include "nsUCVJADll.h"
     8 #include "nsUnicodeEncodeHelper.h"
    10 //----------------------------------------------------------------------
    11 // Global functions and data [declaration]
    13 // Basic mapping from Hankaku to Zenkaku
    14 // Nigori and Maru are taken care of outside this basic mapping
    15 static const char16_t gBasicMapping[0x40] =
    16 {
    17 // 0xff60
    18 0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1,
    19 // 0xff68
    20 0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3,
    21 // 0xff70
    22 0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad,
    23 // 0xff78
    24 0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd,
    25 // 0xff80
    26 0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc,
    27 // 0xff88
    28 0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de,
    29 // 0xff90
    30 0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9,
    31 // 0xff98
    32 0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c
    33 };
    35 // Do we need to check for Nigori for the next unicode ?
    36 #define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e)))
    38 // Do we need to check for Maru for the next unicode ?
    39 #define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e))
    41 // The  unicode is in Katakana Hankaku block
    42 #define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f))
    43 #define IS_NIGORI(u) (0xff9e == (u))
    44 #define IS_MARU(u)   (0xff9f == (u))
    45 #define NIGORI_MODIFIER 1
    46 #define MARU_MODIFIER   2
    48 static const uint16_t g_ufAsciiMapping [] = {
    49   0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000
    50 };
    52 #define SIZE_OF_ISO2022JP_TABLES 5
    53 static const uint16_t * g_ufMappingTables[SIZE_OF_ISO2022JP_TABLES] = {
    54   g_ufAsciiMapping,             // ASCII           ISOREG 6
    55   g_uf0201GLMapping,            // JIS X 0201-1976 ISOREG 14
    56   g_uf0208Mapping,              // JIS X 0208-1983 ISOREG 87
    57   g_uf0208extMapping,           // JIS X 0208 - cp932 ext
    58   g_uf0208Mapping,              // JIS X 0208-1978 ISOREG 42
    59 };
    61 static const uScanClassID g_ufScanClassIDs[SIZE_OF_ISO2022JP_TABLES] = {
    62   u1ByteCharset,                // ASCII           ISOREG 6
    63   u1ByteCharset,                // JIS X 0201-1976 ISOREG 14
    64   u2BytesCharset,               // JIS X 0208-1983 ISOREG 87
    65   u2BytesCharset,               // JIS X 0208- cp932 ext
    66   u2BytesCharset,               // JIS X 0208-1978 ISOREG 42
    67 };
    68 #define JIS_X_208_INDEX 2
    70 //----------------------------------------------------------------------
    71 // Class nsUnicodeToISO2022JP [implementation]
    73 // worst case max length: 
    74 //  1  2 3  4  5  6  7 8
    75 // ESC $ B XX XX ESC ( B
    76 nsUnicodeToISO2022JP::nsUnicodeToISO2022JP() 
    77 : nsEncoderSupport(8)
    78 {
    79   Reset();
    80 }
    82 nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP() 
    83 {
    84 }
    86 nsresult nsUnicodeToISO2022JP::ChangeCharset(int32_t aCharset,
    87                                              char * aDest, 
    88                                              int32_t * aDestLength)
    89 {
    90   // both 2 and 3 generate the same escape sequence. 2 is for
    91   // the standard JISx0208 table, and 3 is for theCP932 extensions
    92   // therefore, we treat them as the same one.
    93   if(((2 == aCharset) && ( 3 == mCharset)) ||
    94      ((3 == aCharset) && ( 2 == mCharset)) )
    95   {
    96     mCharset = aCharset;
    97   }
    99   if(aCharset == mCharset) 
   100   {
   101     *aDestLength = 0;
   102     return NS_OK;
   103   } 
   105   if (*aDestLength < 3) {
   106     *aDestLength = 0;
   107     return NS_OK_UENC_MOREOUTPUT;
   108   }
   110   switch (aCharset) {
   111     case 0: // ASCII ISOREG 6
   112       aDest[0] = 0x1b;
   113       aDest[1] = '(';
   114       aDest[2] = 'B';
   115       break;
   116     case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14
   117       aDest[0] = 0x1b;
   118       aDest[1] = '(';
   119       aDest[2] = 'J';
   120       break;
   121     case 2: // JIS X 0208-1983 ISOREG 87
   122     case 3: // JIS X 0208-1983 
   123             // we currently use this for CP932 ext
   124       aDest[0] = 0x1b;
   125       aDest[1] = '$';
   126       aDest[2] = 'B';
   127       break;
   128     case 4: // JIS X 0201-1978 ISOREG 87- 
   129             // we currently do not have a diff mapping for it.
   130       aDest[0] = 0x1b;
   131       aDest[1] = '$';
   132       aDest[2] = '@';
   133       break;
   134   }
   136   mCharset = aCharset;
   137   *aDestLength = 3;
   138   return NS_OK;
   139 }
   141 nsresult nsUnicodeToISO2022JP::ConvertHankaku(const char16_t * aSrc,
   142                                               int32_t * aSrcLength,
   143                                               char * aDest,
   144                                               int32_t * aDestLength)
   145 {
   146   nsresult res = NS_OK;
   148   const char16_t * src = aSrc;
   149   const char16_t * srcEnd = aSrc + *aSrcLength;
   150   char * dest = aDest;
   151   char * destEnd = aDest + *aDestLength;
   152   char16_t srcChar, tempChar;
   153   int32_t bcr, bcw;
   155   bcw = destEnd - dest;
   156   res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw);
   157   dest += bcw;
   158   if (res != NS_OK) {
   159     return res;
   160   }
   162   while (src < srcEnd) {
   163     srcChar = *src;
   164     if (!IS_HANKAKU(srcChar)) {
   165       break;
   166     }
   167     ++src;
   168     tempChar = gBasicMapping[(srcChar) - 0xff60];
   170     if (src < srcEnd) {
   171       // if the character could take a modifier, and the next char
   172       // is a modifier, modify it and eat one char16_t
   173       if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) {
   174         tempChar += NIGORI_MODIFIER;
   175         ++src;
   176       } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) {
   177         tempChar += MARU_MODIFIER;
   178         ++src;
   179       }
   180     }
   181     bcr = 1;
   182     bcw = destEnd - dest;
   183     res = nsUnicodeEncodeHelper::ConvertByTable(
   184              &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX],
   185              nullptr, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]);
   186     dest += bcw;
   187     if (res != NS_OK)
   188       break;
   189   }
   190   *aDestLength = dest - aDest;
   191   *aSrcLength = src - aSrc;
   192   return res;
   193 }
   195 //----------------------------------------------------------------------
   196 // Subclassing of nsTableEncoderSupport class [implementation]
   198 NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr(
   199                                     const char16_t * aSrc, 
   200                                     int32_t * aSrcLength, 
   201                                     char * aDest, 
   202                                     int32_t * aDestLength)
   203 {
   204   nsresult res = NS_OK;
   206   const char16_t * src = aSrc;
   207   const char16_t * srcEnd = aSrc + *aSrcLength;
   208   char * dest = aDest;
   209   char * destEnd = aDest + *aDestLength;
   210   int32_t bcr, bcw;
   211   int32_t i;
   213   while (src < srcEnd) {
   214     for (i=0; i< SIZE_OF_ISO2022JP_TABLES ; i++) {
   215       bcr = 1;
   216       bcw = destEnd - dest;
   217       res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 
   218                                       g_ufScanClassIDs[i], nullptr,
   219                                       (uMappingTable *) g_ufMappingTables[i]);
   220       if (res != NS_ERROR_UENC_NOMAPPING) break;
   221     }
   223     if ( i>=  SIZE_OF_ISO2022JP_TABLES) {
   224       if (IS_HANKAKU(*src)) {
   225         bcr = srcEnd - src;
   226         bcw = destEnd - dest;
   227         res = ConvertHankaku(src, &bcr, dest, &bcw);
   228         dest += bcw;
   229         src += bcr;
   230         if (res == NS_OK) continue;
   231       } else {
   232         res = NS_ERROR_UENC_NOMAPPING;
   233         src++;
   234       }
   235     }
   236     if (res != NS_OK) break;
   238     bcw = destEnd - dest;
   239     res = ChangeCharset(i, dest, &bcw);
   240     dest += bcw;
   241     if (res != NS_OK) break;
   243     bcr = srcEnd - src;
   244     bcw = destEnd - dest;
   245     res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 
   246                                       g_ufScanClassIDs[i], nullptr,
   247                                       (uMappingTable *) g_ufMappingTables[i]);
   248     src += bcr;
   249     dest += bcw;
   251     if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break;
   252     if (res == NS_ERROR_UENC_NOMAPPING) src--;
   253   }
   255   *aSrcLength = src - aSrc;
   256   *aDestLength  = dest - aDest;
   257   return res;
   258 }
   260 NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest, 
   261                                                  int32_t * aDestLength)
   262 {
   263   ChangeCharset(0, aDest, aDestLength);
   264   return NS_OK;
   265 }
   267 NS_IMETHODIMP nsUnicodeToISO2022JP::Reset()
   268 {
   269   mCharset = 0;
   270   return nsEncoderSupport::Reset();
   271 }

mercurial