1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,271 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +#include "nsUnicodeToISO2022JP.h" 1.10 +#include "nsUCVJADll.h" 1.11 +#include "nsUnicodeEncodeHelper.h" 1.12 + 1.13 +//---------------------------------------------------------------------- 1.14 +// Global functions and data [declaration] 1.15 + 1.16 +// Basic mapping from Hankaku to Zenkaku 1.17 +// Nigori and Maru are taken care of outside this basic mapping 1.18 +static const char16_t gBasicMapping[0x40] = 1.19 +{ 1.20 +// 0xff60 1.21 +0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1, 1.22 +// 0xff68 1.23 +0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3, 1.24 +// 0xff70 1.25 +0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad, 1.26 +// 0xff78 1.27 +0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd, 1.28 +// 0xff80 1.29 +0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc, 1.30 +// 0xff88 1.31 +0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de, 1.32 +// 0xff90 1.33 +0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9, 1.34 +// 0xff98 1.35 +0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c 1.36 +}; 1.37 + 1.38 +// Do we need to check for Nigori for the next unicode ? 1.39 +#define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e))) 1.40 + 1.41 +// Do we need to check for Maru for the next unicode ? 1.42 +#define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e)) 1.43 + 1.44 +// The unicode is in Katakana Hankaku block 1.45 +#define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f)) 1.46 +#define IS_NIGORI(u) (0xff9e == (u)) 1.47 +#define IS_MARU(u) (0xff9f == (u)) 1.48 +#define NIGORI_MODIFIER 1 1.49 +#define MARU_MODIFIER 2 1.50 + 1.51 +static const uint16_t g_ufAsciiMapping [] = { 1.52 + 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000 1.53 +}; 1.54 + 1.55 +#define SIZE_OF_ISO2022JP_TABLES 5 1.56 +static const uint16_t * g_ufMappingTables[SIZE_OF_ISO2022JP_TABLES] = { 1.57 + g_ufAsciiMapping, // ASCII ISOREG 6 1.58 + g_uf0201GLMapping, // JIS X 0201-1976 ISOREG 14 1.59 + g_uf0208Mapping, // JIS X 0208-1983 ISOREG 87 1.60 + g_uf0208extMapping, // JIS X 0208 - cp932 ext 1.61 + g_uf0208Mapping, // JIS X 0208-1978 ISOREG 42 1.62 +}; 1.63 + 1.64 +static const uScanClassID g_ufScanClassIDs[SIZE_OF_ISO2022JP_TABLES] = { 1.65 + u1ByteCharset, // ASCII ISOREG 6 1.66 + u1ByteCharset, // JIS X 0201-1976 ISOREG 14 1.67 + u2BytesCharset, // JIS X 0208-1983 ISOREG 87 1.68 + u2BytesCharset, // JIS X 0208- cp932 ext 1.69 + u2BytesCharset, // JIS X 0208-1978 ISOREG 42 1.70 +}; 1.71 +#define JIS_X_208_INDEX 2 1.72 + 1.73 +//---------------------------------------------------------------------- 1.74 +// Class nsUnicodeToISO2022JP [implementation] 1.75 + 1.76 +// worst case max length: 1.77 +// 1 2 3 4 5 6 7 8 1.78 +// ESC $ B XX XX ESC ( B 1.79 +nsUnicodeToISO2022JP::nsUnicodeToISO2022JP() 1.80 +: nsEncoderSupport(8) 1.81 +{ 1.82 + Reset(); 1.83 +} 1.84 + 1.85 +nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP() 1.86 +{ 1.87 +} 1.88 + 1.89 +nsresult nsUnicodeToISO2022JP::ChangeCharset(int32_t aCharset, 1.90 + char * aDest, 1.91 + int32_t * aDestLength) 1.92 +{ 1.93 + // both 2 and 3 generate the same escape sequence. 2 is for 1.94 + // the standard JISx0208 table, and 3 is for theCP932 extensions 1.95 + // therefore, we treat them as the same one. 1.96 + if(((2 == aCharset) && ( 3 == mCharset)) || 1.97 + ((3 == aCharset) && ( 2 == mCharset)) ) 1.98 + { 1.99 + mCharset = aCharset; 1.100 + } 1.101 + 1.102 + if(aCharset == mCharset) 1.103 + { 1.104 + *aDestLength = 0; 1.105 + return NS_OK; 1.106 + } 1.107 + 1.108 + if (*aDestLength < 3) { 1.109 + *aDestLength = 0; 1.110 + return NS_OK_UENC_MOREOUTPUT; 1.111 + } 1.112 + 1.113 + switch (aCharset) { 1.114 + case 0: // ASCII ISOREG 6 1.115 + aDest[0] = 0x1b; 1.116 + aDest[1] = '('; 1.117 + aDest[2] = 'B'; 1.118 + break; 1.119 + case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14 1.120 + aDest[0] = 0x1b; 1.121 + aDest[1] = '('; 1.122 + aDest[2] = 'J'; 1.123 + break; 1.124 + case 2: // JIS X 0208-1983 ISOREG 87 1.125 + case 3: // JIS X 0208-1983 1.126 + // we currently use this for CP932 ext 1.127 + aDest[0] = 0x1b; 1.128 + aDest[1] = '$'; 1.129 + aDest[2] = 'B'; 1.130 + break; 1.131 + case 4: // JIS X 0201-1978 ISOREG 87- 1.132 + // we currently do not have a diff mapping for it. 1.133 + aDest[0] = 0x1b; 1.134 + aDest[1] = '$'; 1.135 + aDest[2] = '@'; 1.136 + break; 1.137 + } 1.138 + 1.139 + mCharset = aCharset; 1.140 + *aDestLength = 3; 1.141 + return NS_OK; 1.142 +} 1.143 + 1.144 +nsresult nsUnicodeToISO2022JP::ConvertHankaku(const char16_t * aSrc, 1.145 + int32_t * aSrcLength, 1.146 + char * aDest, 1.147 + int32_t * aDestLength) 1.148 +{ 1.149 + nsresult res = NS_OK; 1.150 + 1.151 + const char16_t * src = aSrc; 1.152 + const char16_t * srcEnd = aSrc + *aSrcLength; 1.153 + char * dest = aDest; 1.154 + char * destEnd = aDest + *aDestLength; 1.155 + char16_t srcChar, tempChar; 1.156 + int32_t bcr, bcw; 1.157 + 1.158 + bcw = destEnd - dest; 1.159 + res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw); 1.160 + dest += bcw; 1.161 + if (res != NS_OK) { 1.162 + return res; 1.163 + } 1.164 + 1.165 + while (src < srcEnd) { 1.166 + srcChar = *src; 1.167 + if (!IS_HANKAKU(srcChar)) { 1.168 + break; 1.169 + } 1.170 + ++src; 1.171 + tempChar = gBasicMapping[(srcChar) - 0xff60]; 1.172 + 1.173 + if (src < srcEnd) { 1.174 + // if the character could take a modifier, and the next char 1.175 + // is a modifier, modify it and eat one char16_t 1.176 + if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) { 1.177 + tempChar += NIGORI_MODIFIER; 1.178 + ++src; 1.179 + } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) { 1.180 + tempChar += MARU_MODIFIER; 1.181 + ++src; 1.182 + } 1.183 + } 1.184 + bcr = 1; 1.185 + bcw = destEnd - dest; 1.186 + res = nsUnicodeEncodeHelper::ConvertByTable( 1.187 + &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX], 1.188 + nullptr, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]); 1.189 + dest += bcw; 1.190 + if (res != NS_OK) 1.191 + break; 1.192 + } 1.193 + *aDestLength = dest - aDest; 1.194 + *aSrcLength = src - aSrc; 1.195 + return res; 1.196 +} 1.197 + 1.198 +//---------------------------------------------------------------------- 1.199 +// Subclassing of nsTableEncoderSupport class [implementation] 1.200 + 1.201 +NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr( 1.202 + const char16_t * aSrc, 1.203 + int32_t * aSrcLength, 1.204 + char * aDest, 1.205 + int32_t * aDestLength) 1.206 +{ 1.207 + nsresult res = NS_OK; 1.208 + 1.209 + const char16_t * src = aSrc; 1.210 + const char16_t * srcEnd = aSrc + *aSrcLength; 1.211 + char * dest = aDest; 1.212 + char * destEnd = aDest + *aDestLength; 1.213 + int32_t bcr, bcw; 1.214 + int32_t i; 1.215 + 1.216 + while (src < srcEnd) { 1.217 + for (i=0; i< SIZE_OF_ISO2022JP_TABLES ; i++) { 1.218 + bcr = 1; 1.219 + bcw = destEnd - dest; 1.220 + res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 1.221 + g_ufScanClassIDs[i], nullptr, 1.222 + (uMappingTable *) g_ufMappingTables[i]); 1.223 + if (res != NS_ERROR_UENC_NOMAPPING) break; 1.224 + } 1.225 + 1.226 + if ( i>= SIZE_OF_ISO2022JP_TABLES) { 1.227 + if (IS_HANKAKU(*src)) { 1.228 + bcr = srcEnd - src; 1.229 + bcw = destEnd - dest; 1.230 + res = ConvertHankaku(src, &bcr, dest, &bcw); 1.231 + dest += bcw; 1.232 + src += bcr; 1.233 + if (res == NS_OK) continue; 1.234 + } else { 1.235 + res = NS_ERROR_UENC_NOMAPPING; 1.236 + src++; 1.237 + } 1.238 + } 1.239 + if (res != NS_OK) break; 1.240 + 1.241 + bcw = destEnd - dest; 1.242 + res = ChangeCharset(i, dest, &bcw); 1.243 + dest += bcw; 1.244 + if (res != NS_OK) break; 1.245 + 1.246 + bcr = srcEnd - src; 1.247 + bcw = destEnd - dest; 1.248 + res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, 1.249 + g_ufScanClassIDs[i], nullptr, 1.250 + (uMappingTable *) g_ufMappingTables[i]); 1.251 + src += bcr; 1.252 + dest += bcw; 1.253 + 1.254 + if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break; 1.255 + if (res == NS_ERROR_UENC_NOMAPPING) src--; 1.256 + } 1.257 + 1.258 + *aSrcLength = src - aSrc; 1.259 + *aDestLength = dest - aDest; 1.260 + return res; 1.261 +} 1.262 + 1.263 +NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest, 1.264 + int32_t * aDestLength) 1.265 +{ 1.266 + ChangeCharset(0, aDest, aDestLength); 1.267 + return NS_OK; 1.268 +} 1.269 + 1.270 +NS_IMETHODIMP nsUnicodeToISO2022JP::Reset() 1.271 +{ 1.272 + mCharset = 0; 1.273 + return nsEncoderSupport::Reset(); 1.274 +}