intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,271 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +#include "nsUnicodeToISO2022JP.h"
    1.10 +#include "nsUCVJADll.h"
    1.11 +#include "nsUnicodeEncodeHelper.h"
    1.12 +
    1.13 +//----------------------------------------------------------------------
    1.14 +// Global functions and data [declaration]
    1.15 +
    1.16 +// Basic mapping from Hankaku to Zenkaku
    1.17 +// Nigori and Maru are taken care of outside this basic mapping
    1.18 +static const char16_t gBasicMapping[0x40] =
    1.19 +{
    1.20 +// 0xff60
    1.21 +0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1,
    1.22 +// 0xff68
    1.23 +0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3,
    1.24 +// 0xff70
    1.25 +0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad,
    1.26 +// 0xff78
    1.27 +0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd,
    1.28 +// 0xff80
    1.29 +0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc,
    1.30 +// 0xff88
    1.31 +0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de,
    1.32 +// 0xff90
    1.33 +0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9,
    1.34 +// 0xff98
    1.35 +0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c
    1.36 +};
    1.37 +
    1.38 +// Do we need to check for Nigori for the next unicode ?
    1.39 +#define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e)))
    1.40 +
    1.41 +// Do we need to check for Maru for the next unicode ?
    1.42 +#define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e))
    1.43 +
    1.44 +// The  unicode is in Katakana Hankaku block
    1.45 +#define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f))
    1.46 +#define IS_NIGORI(u) (0xff9e == (u))
    1.47 +#define IS_MARU(u)   (0xff9f == (u))
    1.48 +#define NIGORI_MODIFIER 1
    1.49 +#define MARU_MODIFIER   2
    1.50 +
    1.51 +static const uint16_t g_ufAsciiMapping [] = {
    1.52 +  0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000
    1.53 +};
    1.54 +
    1.55 +#define SIZE_OF_ISO2022JP_TABLES 5
    1.56 +static const uint16_t * g_ufMappingTables[SIZE_OF_ISO2022JP_TABLES] = {
    1.57 +  g_ufAsciiMapping,             // ASCII           ISOREG 6
    1.58 +  g_uf0201GLMapping,            // JIS X 0201-1976 ISOREG 14
    1.59 +  g_uf0208Mapping,              // JIS X 0208-1983 ISOREG 87
    1.60 +  g_uf0208extMapping,           // JIS X 0208 - cp932 ext
    1.61 +  g_uf0208Mapping,              // JIS X 0208-1978 ISOREG 42
    1.62 +};
    1.63 +
    1.64 +static const uScanClassID g_ufScanClassIDs[SIZE_OF_ISO2022JP_TABLES] = {
    1.65 +  u1ByteCharset,                // ASCII           ISOREG 6
    1.66 +  u1ByteCharset,                // JIS X 0201-1976 ISOREG 14
    1.67 +  u2BytesCharset,               // JIS X 0208-1983 ISOREG 87
    1.68 +  u2BytesCharset,               // JIS X 0208- cp932 ext
    1.69 +  u2BytesCharset,               // JIS X 0208-1978 ISOREG 42
    1.70 +};
    1.71 +#define JIS_X_208_INDEX 2
    1.72 +
    1.73 +//----------------------------------------------------------------------
    1.74 +// Class nsUnicodeToISO2022JP [implementation]
    1.75 +
    1.76 +// worst case max length: 
    1.77 +//  1  2 3  4  5  6  7 8
    1.78 +// ESC $ B XX XX ESC ( B
    1.79 +nsUnicodeToISO2022JP::nsUnicodeToISO2022JP() 
    1.80 +: nsEncoderSupport(8)
    1.81 +{
    1.82 +  Reset();
    1.83 +}
    1.84 +
    1.85 +nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP() 
    1.86 +{
    1.87 +}
    1.88 +
    1.89 +nsresult nsUnicodeToISO2022JP::ChangeCharset(int32_t aCharset,
    1.90 +                                             char * aDest, 
    1.91 +                                             int32_t * aDestLength)
    1.92 +{
    1.93 +  // both 2 and 3 generate the same escape sequence. 2 is for
    1.94 +  // the standard JISx0208 table, and 3 is for theCP932 extensions
    1.95 +  // therefore, we treat them as the same one.
    1.96 +  if(((2 == aCharset) && ( 3 == mCharset)) ||
    1.97 +     ((3 == aCharset) && ( 2 == mCharset)) )
    1.98 +  {
    1.99 +    mCharset = aCharset;
   1.100 +  }
   1.101 +
   1.102 +  if(aCharset == mCharset) 
   1.103 +  {
   1.104 +    *aDestLength = 0;
   1.105 +    return NS_OK;
   1.106 +  } 
   1.107 +  
   1.108 +  if (*aDestLength < 3) {
   1.109 +    *aDestLength = 0;
   1.110 +    return NS_OK_UENC_MOREOUTPUT;
   1.111 +  }
   1.112 +
   1.113 +  switch (aCharset) {
   1.114 +    case 0: // ASCII ISOREG 6
   1.115 +      aDest[0] = 0x1b;
   1.116 +      aDest[1] = '(';
   1.117 +      aDest[2] = 'B';
   1.118 +      break;
   1.119 +    case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14
   1.120 +      aDest[0] = 0x1b;
   1.121 +      aDest[1] = '(';
   1.122 +      aDest[2] = 'J';
   1.123 +      break;
   1.124 +    case 2: // JIS X 0208-1983 ISOREG 87
   1.125 +    case 3: // JIS X 0208-1983 
   1.126 +            // we currently use this for CP932 ext
   1.127 +      aDest[0] = 0x1b;
   1.128 +      aDest[1] = '$';
   1.129 +      aDest[2] = 'B';
   1.130 +      break;
   1.131 +    case 4: // JIS X 0201-1978 ISOREG 87- 
   1.132 +            // we currently do not have a diff mapping for it.
   1.133 +      aDest[0] = 0x1b;
   1.134 +      aDest[1] = '$';
   1.135 +      aDest[2] = '@';
   1.136 +      break;
   1.137 +  }
   1.138 +
   1.139 +  mCharset = aCharset;
   1.140 +  *aDestLength = 3;
   1.141 +  return NS_OK;
   1.142 +}
   1.143 +
   1.144 +nsresult nsUnicodeToISO2022JP::ConvertHankaku(const char16_t * aSrc,
   1.145 +                                              int32_t * aSrcLength,
   1.146 +                                              char * aDest,
   1.147 +                                              int32_t * aDestLength)
   1.148 +{
   1.149 +  nsresult res = NS_OK;
   1.150 +
   1.151 +  const char16_t * src = aSrc;
   1.152 +  const char16_t * srcEnd = aSrc + *aSrcLength;
   1.153 +  char * dest = aDest;
   1.154 +  char * destEnd = aDest + *aDestLength;
   1.155 +  char16_t srcChar, tempChar;
   1.156 +  int32_t bcr, bcw;
   1.157 +
   1.158 +  bcw = destEnd - dest;
   1.159 +  res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw);
   1.160 +  dest += bcw;
   1.161 +  if (res != NS_OK) {
   1.162 +    return res;
   1.163 +  }
   1.164 +
   1.165 +  while (src < srcEnd) {
   1.166 +    srcChar = *src;
   1.167 +    if (!IS_HANKAKU(srcChar)) {
   1.168 +      break;
   1.169 +    }
   1.170 +    ++src;
   1.171 +    tempChar = gBasicMapping[(srcChar) - 0xff60];
   1.172 +
   1.173 +    if (src < srcEnd) {
   1.174 +      // if the character could take a modifier, and the next char
   1.175 +      // is a modifier, modify it and eat one char16_t
   1.176 +      if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) {
   1.177 +        tempChar += NIGORI_MODIFIER;
   1.178 +        ++src;
   1.179 +      } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) {
   1.180 +        tempChar += MARU_MODIFIER;
   1.181 +        ++src;
   1.182 +      }
   1.183 +    }
   1.184 +    bcr = 1;
   1.185 +    bcw = destEnd - dest;
   1.186 +    res = nsUnicodeEncodeHelper::ConvertByTable(
   1.187 +             &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX],
   1.188 +             nullptr, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]);
   1.189 +    dest += bcw;
   1.190 +    if (res != NS_OK)
   1.191 +      break;
   1.192 +  }
   1.193 +  *aDestLength = dest - aDest;
   1.194 +  *aSrcLength = src - aSrc;
   1.195 +  return res;
   1.196 +}
   1.197 +
   1.198 +//----------------------------------------------------------------------
   1.199 +// Subclassing of nsTableEncoderSupport class [implementation]
   1.200 +
   1.201 +NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr(
   1.202 +                                    const char16_t * aSrc, 
   1.203 +                                    int32_t * aSrcLength, 
   1.204 +                                    char * aDest, 
   1.205 +                                    int32_t * aDestLength)
   1.206 +{
   1.207 +  nsresult res = NS_OK;
   1.208 +
   1.209 +  const char16_t * src = aSrc;
   1.210 +  const char16_t * srcEnd = aSrc + *aSrcLength;
   1.211 +  char * dest = aDest;
   1.212 +  char * destEnd = aDest + *aDestLength;
   1.213 +  int32_t bcr, bcw;
   1.214 +  int32_t i;
   1.215 +
   1.216 +  while (src < srcEnd) {
   1.217 +    for (i=0; i< SIZE_OF_ISO2022JP_TABLES ; i++) {
   1.218 +      bcr = 1;
   1.219 +      bcw = destEnd - dest;
   1.220 +      res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 
   1.221 +                                      g_ufScanClassIDs[i], nullptr,
   1.222 +                                      (uMappingTable *) g_ufMappingTables[i]);
   1.223 +      if (res != NS_ERROR_UENC_NOMAPPING) break;
   1.224 +    }
   1.225 +
   1.226 +    if ( i>=  SIZE_OF_ISO2022JP_TABLES) {
   1.227 +      if (IS_HANKAKU(*src)) {
   1.228 +        bcr = srcEnd - src;
   1.229 +        bcw = destEnd - dest;
   1.230 +        res = ConvertHankaku(src, &bcr, dest, &bcw);
   1.231 +        dest += bcw;
   1.232 +        src += bcr;
   1.233 +        if (res == NS_OK) continue;
   1.234 +      } else {
   1.235 +        res = NS_ERROR_UENC_NOMAPPING;
   1.236 +        src++;
   1.237 +      }
   1.238 +    }
   1.239 +    if (res != NS_OK) break;
   1.240 +
   1.241 +    bcw = destEnd - dest;
   1.242 +    res = ChangeCharset(i, dest, &bcw);
   1.243 +    dest += bcw;
   1.244 +    if (res != NS_OK) break;
   1.245 +
   1.246 +    bcr = srcEnd - src;
   1.247 +    bcw = destEnd - dest;
   1.248 +    res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 
   1.249 +                                      g_ufScanClassIDs[i], nullptr,
   1.250 +                                      (uMappingTable *) g_ufMappingTables[i]);
   1.251 +    src += bcr;
   1.252 +    dest += bcw;
   1.253 +
   1.254 +    if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break;
   1.255 +    if (res == NS_ERROR_UENC_NOMAPPING) src--;
   1.256 +  }
   1.257 +
   1.258 +  *aSrcLength = src - aSrc;
   1.259 +  *aDestLength  = dest - aDest;
   1.260 +  return res;
   1.261 +}
   1.262 +
   1.263 +NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest, 
   1.264 +                                                 int32_t * aDestLength)
   1.265 +{
   1.266 +  ChangeCharset(0, aDest, aDestLength);
   1.267 +  return NS_OK;
   1.268 +}
   1.269 +
   1.270 +NS_IMETHODIMP nsUnicodeToISO2022JP::Reset()
   1.271 +{
   1.272 +  mCharset = 0;
   1.273 +  return nsEncoderSupport::Reset();
   1.274 +}

mercurial