intl/uconv/ucvja/nsUnicodeToISO2022JP.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsUnicodeToISO2022JP.h"
michael@0 7 #include "nsUCVJADll.h"
michael@0 8 #include "nsUnicodeEncodeHelper.h"
michael@0 9
michael@0 10 //----------------------------------------------------------------------
michael@0 11 // Global functions and data [declaration]
michael@0 12
michael@0 13 // Basic mapping from Hankaku to Zenkaku
michael@0 14 // Nigori and Maru are taken care of outside this basic mapping
michael@0 15 static const char16_t gBasicMapping[0x40] =
michael@0 16 {
michael@0 17 // 0xff60
michael@0 18 0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1,
michael@0 19 // 0xff68
michael@0 20 0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3,
michael@0 21 // 0xff70
michael@0 22 0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad,
michael@0 23 // 0xff78
michael@0 24 0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd,
michael@0 25 // 0xff80
michael@0 26 0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc,
michael@0 27 // 0xff88
michael@0 28 0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de,
michael@0 29 // 0xff90
michael@0 30 0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9,
michael@0 31 // 0xff98
michael@0 32 0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c
michael@0 33 };
michael@0 34
michael@0 35 // Do we need to check for Nigori for the next unicode ?
michael@0 36 #define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e)))
michael@0 37
michael@0 38 // Do we need to check for Maru for the next unicode ?
michael@0 39 #define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e))
michael@0 40
michael@0 41 // The unicode is in Katakana Hankaku block
michael@0 42 #define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f))
michael@0 43 #define IS_NIGORI(u) (0xff9e == (u))
michael@0 44 #define IS_MARU(u) (0xff9f == (u))
michael@0 45 #define NIGORI_MODIFIER 1
michael@0 46 #define MARU_MODIFIER 2
michael@0 47
michael@0 48 static const uint16_t g_ufAsciiMapping [] = {
michael@0 49 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000
michael@0 50 };
michael@0 51
michael@0 52 #define SIZE_OF_ISO2022JP_TABLES 5
michael@0 53 static const uint16_t * g_ufMappingTables[SIZE_OF_ISO2022JP_TABLES] = {
michael@0 54 g_ufAsciiMapping, // ASCII ISOREG 6
michael@0 55 g_uf0201GLMapping, // JIS X 0201-1976 ISOREG 14
michael@0 56 g_uf0208Mapping, // JIS X 0208-1983 ISOREG 87
michael@0 57 g_uf0208extMapping, // JIS X 0208 - cp932 ext
michael@0 58 g_uf0208Mapping, // JIS X 0208-1978 ISOREG 42
michael@0 59 };
michael@0 60
michael@0 61 static const uScanClassID g_ufScanClassIDs[SIZE_OF_ISO2022JP_TABLES] = {
michael@0 62 u1ByteCharset, // ASCII ISOREG 6
michael@0 63 u1ByteCharset, // JIS X 0201-1976 ISOREG 14
michael@0 64 u2BytesCharset, // JIS X 0208-1983 ISOREG 87
michael@0 65 u2BytesCharset, // JIS X 0208- cp932 ext
michael@0 66 u2BytesCharset, // JIS X 0208-1978 ISOREG 42
michael@0 67 };
michael@0 68 #define JIS_X_208_INDEX 2
michael@0 69
michael@0 70 //----------------------------------------------------------------------
michael@0 71 // Class nsUnicodeToISO2022JP [implementation]
michael@0 72
michael@0 73 // worst case max length:
michael@0 74 // 1 2 3 4 5 6 7 8
michael@0 75 // ESC $ B XX XX ESC ( B
michael@0 76 nsUnicodeToISO2022JP::nsUnicodeToISO2022JP()
michael@0 77 : nsEncoderSupport(8)
michael@0 78 {
michael@0 79 Reset();
michael@0 80 }
michael@0 81
michael@0 82 nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP()
michael@0 83 {
michael@0 84 }
michael@0 85
michael@0 86 nsresult nsUnicodeToISO2022JP::ChangeCharset(int32_t aCharset,
michael@0 87 char * aDest,
michael@0 88 int32_t * aDestLength)
michael@0 89 {
michael@0 90 // both 2 and 3 generate the same escape sequence. 2 is for
michael@0 91 // the standard JISx0208 table, and 3 is for theCP932 extensions
michael@0 92 // therefore, we treat them as the same one.
michael@0 93 if(((2 == aCharset) && ( 3 == mCharset)) ||
michael@0 94 ((3 == aCharset) && ( 2 == mCharset)) )
michael@0 95 {
michael@0 96 mCharset = aCharset;
michael@0 97 }
michael@0 98
michael@0 99 if(aCharset == mCharset)
michael@0 100 {
michael@0 101 *aDestLength = 0;
michael@0 102 return NS_OK;
michael@0 103 }
michael@0 104
michael@0 105 if (*aDestLength < 3) {
michael@0 106 *aDestLength = 0;
michael@0 107 return NS_OK_UENC_MOREOUTPUT;
michael@0 108 }
michael@0 109
michael@0 110 switch (aCharset) {
michael@0 111 case 0: // ASCII ISOREG 6
michael@0 112 aDest[0] = 0x1b;
michael@0 113 aDest[1] = '(';
michael@0 114 aDest[2] = 'B';
michael@0 115 break;
michael@0 116 case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14
michael@0 117 aDest[0] = 0x1b;
michael@0 118 aDest[1] = '(';
michael@0 119 aDest[2] = 'J';
michael@0 120 break;
michael@0 121 case 2: // JIS X 0208-1983 ISOREG 87
michael@0 122 case 3: // JIS X 0208-1983
michael@0 123 // we currently use this for CP932 ext
michael@0 124 aDest[0] = 0x1b;
michael@0 125 aDest[1] = '$';
michael@0 126 aDest[2] = 'B';
michael@0 127 break;
michael@0 128 case 4: // JIS X 0201-1978 ISOREG 87-
michael@0 129 // we currently do not have a diff mapping for it.
michael@0 130 aDest[0] = 0x1b;
michael@0 131 aDest[1] = '$';
michael@0 132 aDest[2] = '@';
michael@0 133 break;
michael@0 134 }
michael@0 135
michael@0 136 mCharset = aCharset;
michael@0 137 *aDestLength = 3;
michael@0 138 return NS_OK;
michael@0 139 }
michael@0 140
michael@0 141 nsresult nsUnicodeToISO2022JP::ConvertHankaku(const char16_t * aSrc,
michael@0 142 int32_t * aSrcLength,
michael@0 143 char * aDest,
michael@0 144 int32_t * aDestLength)
michael@0 145 {
michael@0 146 nsresult res = NS_OK;
michael@0 147
michael@0 148 const char16_t * src = aSrc;
michael@0 149 const char16_t * srcEnd = aSrc + *aSrcLength;
michael@0 150 char * dest = aDest;
michael@0 151 char * destEnd = aDest + *aDestLength;
michael@0 152 char16_t srcChar, tempChar;
michael@0 153 int32_t bcr, bcw;
michael@0 154
michael@0 155 bcw = destEnd - dest;
michael@0 156 res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw);
michael@0 157 dest += bcw;
michael@0 158 if (res != NS_OK) {
michael@0 159 return res;
michael@0 160 }
michael@0 161
michael@0 162 while (src < srcEnd) {
michael@0 163 srcChar = *src;
michael@0 164 if (!IS_HANKAKU(srcChar)) {
michael@0 165 break;
michael@0 166 }
michael@0 167 ++src;
michael@0 168 tempChar = gBasicMapping[(srcChar) - 0xff60];
michael@0 169
michael@0 170 if (src < srcEnd) {
michael@0 171 // if the character could take a modifier, and the next char
michael@0 172 // is a modifier, modify it and eat one char16_t
michael@0 173 if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) {
michael@0 174 tempChar += NIGORI_MODIFIER;
michael@0 175 ++src;
michael@0 176 } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) {
michael@0 177 tempChar += MARU_MODIFIER;
michael@0 178 ++src;
michael@0 179 }
michael@0 180 }
michael@0 181 bcr = 1;
michael@0 182 bcw = destEnd - dest;
michael@0 183 res = nsUnicodeEncodeHelper::ConvertByTable(
michael@0 184 &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX],
michael@0 185 nullptr, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]);
michael@0 186 dest += bcw;
michael@0 187 if (res != NS_OK)
michael@0 188 break;
michael@0 189 }
michael@0 190 *aDestLength = dest - aDest;
michael@0 191 *aSrcLength = src - aSrc;
michael@0 192 return res;
michael@0 193 }
michael@0 194
michael@0 195 //----------------------------------------------------------------------
michael@0 196 // Subclassing of nsTableEncoderSupport class [implementation]
michael@0 197
michael@0 198 NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr(
michael@0 199 const char16_t * aSrc,
michael@0 200 int32_t * aSrcLength,
michael@0 201 char * aDest,
michael@0 202 int32_t * aDestLength)
michael@0 203 {
michael@0 204 nsresult res = NS_OK;
michael@0 205
michael@0 206 const char16_t * src = aSrc;
michael@0 207 const char16_t * srcEnd = aSrc + *aSrcLength;
michael@0 208 char * dest = aDest;
michael@0 209 char * destEnd = aDest + *aDestLength;
michael@0 210 int32_t bcr, bcw;
michael@0 211 int32_t i;
michael@0 212
michael@0 213 while (src < srcEnd) {
michael@0 214 for (i=0; i< SIZE_OF_ISO2022JP_TABLES ; i++) {
michael@0 215 bcr = 1;
michael@0 216 bcw = destEnd - dest;
michael@0 217 res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw,
michael@0 218 g_ufScanClassIDs[i], nullptr,
michael@0 219 (uMappingTable *) g_ufMappingTables[i]);
michael@0 220 if (res != NS_ERROR_UENC_NOMAPPING) break;
michael@0 221 }
michael@0 222
michael@0 223 if ( i>= SIZE_OF_ISO2022JP_TABLES) {
michael@0 224 if (IS_HANKAKU(*src)) {
michael@0 225 bcr = srcEnd - src;
michael@0 226 bcw = destEnd - dest;
michael@0 227 res = ConvertHankaku(src, &bcr, dest, &bcw);
michael@0 228 dest += bcw;
michael@0 229 src += bcr;
michael@0 230 if (res == NS_OK) continue;
michael@0 231 } else {
michael@0 232 res = NS_ERROR_UENC_NOMAPPING;
michael@0 233 src++;
michael@0 234 }
michael@0 235 }
michael@0 236 if (res != NS_OK) break;
michael@0 237
michael@0 238 bcw = destEnd - dest;
michael@0 239 res = ChangeCharset(i, dest, &bcw);
michael@0 240 dest += bcw;
michael@0 241 if (res != NS_OK) break;
michael@0 242
michael@0 243 bcr = srcEnd - src;
michael@0 244 bcw = destEnd - dest;
michael@0 245 res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw,
michael@0 246 g_ufScanClassIDs[i], nullptr,
michael@0 247 (uMappingTable *) g_ufMappingTables[i]);
michael@0 248 src += bcr;
michael@0 249 dest += bcw;
michael@0 250
michael@0 251 if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break;
michael@0 252 if (res == NS_ERROR_UENC_NOMAPPING) src--;
michael@0 253 }
michael@0 254
michael@0 255 *aSrcLength = src - aSrc;
michael@0 256 *aDestLength = dest - aDest;
michael@0 257 return res;
michael@0 258 }
michael@0 259
michael@0 260 NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest,
michael@0 261 int32_t * aDestLength)
michael@0 262 {
michael@0 263 ChangeCharset(0, aDest, aDestLength);
michael@0 264 return NS_OK;
michael@0 265 }
michael@0 266
michael@0 267 NS_IMETHODIMP nsUnicodeToISO2022JP::Reset()
michael@0 268 {
michael@0 269 mCharset = 0;
michael@0 270 return nsEncoderSupport::Reset();
michael@0 271 }

mercurial