Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #include "nsUnicodeToISO2022JP.h"
7 #include "nsUCVJADll.h"
8 #include "nsUnicodeEncodeHelper.h"
10 //----------------------------------------------------------------------
11 // Global functions and data [declaration]
13 // Basic mapping from Hankaku to Zenkaku
14 // Nigori and Maru are taken care of outside this basic mapping
15 static const char16_t gBasicMapping[0x40] =
16 {
17 // 0xff60
18 0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1,
19 // 0xff68
20 0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3,
21 // 0xff70
22 0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad,
23 // 0xff78
24 0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd,
25 // 0xff80
26 0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc,
27 // 0xff88
28 0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de,
29 // 0xff90
30 0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9,
31 // 0xff98
32 0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c
33 };
35 // Do we need to check for Nigori for the next unicode ?
36 #define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e)))
38 // Do we need to check for Maru for the next unicode ?
39 #define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e))
41 // The unicode is in Katakana Hankaku block
42 #define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f))
43 #define IS_NIGORI(u) (0xff9e == (u))
44 #define IS_MARU(u) (0xff9f == (u))
45 #define NIGORI_MODIFIER 1
46 #define MARU_MODIFIER 2
48 static const uint16_t g_ufAsciiMapping [] = {
49 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000
50 };
52 #define SIZE_OF_ISO2022JP_TABLES 5
53 static const uint16_t * g_ufMappingTables[SIZE_OF_ISO2022JP_TABLES] = {
54 g_ufAsciiMapping, // ASCII ISOREG 6
55 g_uf0201GLMapping, // JIS X 0201-1976 ISOREG 14
56 g_uf0208Mapping, // JIS X 0208-1983 ISOREG 87
57 g_uf0208extMapping, // JIS X 0208 - cp932 ext
58 g_uf0208Mapping, // JIS X 0208-1978 ISOREG 42
59 };
61 static const uScanClassID g_ufScanClassIDs[SIZE_OF_ISO2022JP_TABLES] = {
62 u1ByteCharset, // ASCII ISOREG 6
63 u1ByteCharset, // JIS X 0201-1976 ISOREG 14
64 u2BytesCharset, // JIS X 0208-1983 ISOREG 87
65 u2BytesCharset, // JIS X 0208- cp932 ext
66 u2BytesCharset, // JIS X 0208-1978 ISOREG 42
67 };
68 #define JIS_X_208_INDEX 2
70 //----------------------------------------------------------------------
71 // Class nsUnicodeToISO2022JP [implementation]
73 // worst case max length:
74 // 1 2 3 4 5 6 7 8
75 // ESC $ B XX XX ESC ( B
76 nsUnicodeToISO2022JP::nsUnicodeToISO2022JP()
77 : nsEncoderSupport(8)
78 {
79 Reset();
80 }
82 nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP()
83 {
84 }
86 nsresult nsUnicodeToISO2022JP::ChangeCharset(int32_t aCharset,
87 char * aDest,
88 int32_t * aDestLength)
89 {
90 // both 2 and 3 generate the same escape sequence. 2 is for
91 // the standard JISx0208 table, and 3 is for theCP932 extensions
92 // therefore, we treat them as the same one.
93 if(((2 == aCharset) && ( 3 == mCharset)) ||
94 ((3 == aCharset) && ( 2 == mCharset)) )
95 {
96 mCharset = aCharset;
97 }
99 if(aCharset == mCharset)
100 {
101 *aDestLength = 0;
102 return NS_OK;
103 }
105 if (*aDestLength < 3) {
106 *aDestLength = 0;
107 return NS_OK_UENC_MOREOUTPUT;
108 }
110 switch (aCharset) {
111 case 0: // ASCII ISOREG 6
112 aDest[0] = 0x1b;
113 aDest[1] = '(';
114 aDest[2] = 'B';
115 break;
116 case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14
117 aDest[0] = 0x1b;
118 aDest[1] = '(';
119 aDest[2] = 'J';
120 break;
121 case 2: // JIS X 0208-1983 ISOREG 87
122 case 3: // JIS X 0208-1983
123 // we currently use this for CP932 ext
124 aDest[0] = 0x1b;
125 aDest[1] = '$';
126 aDest[2] = 'B';
127 break;
128 case 4: // JIS X 0201-1978 ISOREG 87-
129 // we currently do not have a diff mapping for it.
130 aDest[0] = 0x1b;
131 aDest[1] = '$';
132 aDest[2] = '@';
133 break;
134 }
136 mCharset = aCharset;
137 *aDestLength = 3;
138 return NS_OK;
139 }
141 nsresult nsUnicodeToISO2022JP::ConvertHankaku(const char16_t * aSrc,
142 int32_t * aSrcLength,
143 char * aDest,
144 int32_t * aDestLength)
145 {
146 nsresult res = NS_OK;
148 const char16_t * src = aSrc;
149 const char16_t * srcEnd = aSrc + *aSrcLength;
150 char * dest = aDest;
151 char * destEnd = aDest + *aDestLength;
152 char16_t srcChar, tempChar;
153 int32_t bcr, bcw;
155 bcw = destEnd - dest;
156 res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw);
157 dest += bcw;
158 if (res != NS_OK) {
159 return res;
160 }
162 while (src < srcEnd) {
163 srcChar = *src;
164 if (!IS_HANKAKU(srcChar)) {
165 break;
166 }
167 ++src;
168 tempChar = gBasicMapping[(srcChar) - 0xff60];
170 if (src < srcEnd) {
171 // if the character could take a modifier, and the next char
172 // is a modifier, modify it and eat one char16_t
173 if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) {
174 tempChar += NIGORI_MODIFIER;
175 ++src;
176 } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) {
177 tempChar += MARU_MODIFIER;
178 ++src;
179 }
180 }
181 bcr = 1;
182 bcw = destEnd - dest;
183 res = nsUnicodeEncodeHelper::ConvertByTable(
184 &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX],
185 nullptr, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]);
186 dest += bcw;
187 if (res != NS_OK)
188 break;
189 }
190 *aDestLength = dest - aDest;
191 *aSrcLength = src - aSrc;
192 return res;
193 }
195 //----------------------------------------------------------------------
196 // Subclassing of nsTableEncoderSupport class [implementation]
198 NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr(
199 const char16_t * aSrc,
200 int32_t * aSrcLength,
201 char * aDest,
202 int32_t * aDestLength)
203 {
204 nsresult res = NS_OK;
206 const char16_t * src = aSrc;
207 const char16_t * srcEnd = aSrc + *aSrcLength;
208 char * dest = aDest;
209 char * destEnd = aDest + *aDestLength;
210 int32_t bcr, bcw;
211 int32_t i;
213 while (src < srcEnd) {
214 for (i=0; i< SIZE_OF_ISO2022JP_TABLES ; i++) {
215 bcr = 1;
216 bcw = destEnd - dest;
217 res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw,
218 g_ufScanClassIDs[i], nullptr,
219 (uMappingTable *) g_ufMappingTables[i]);
220 if (res != NS_ERROR_UENC_NOMAPPING) break;
221 }
223 if ( i>= SIZE_OF_ISO2022JP_TABLES) {
224 if (IS_HANKAKU(*src)) {
225 bcr = srcEnd - src;
226 bcw = destEnd - dest;
227 res = ConvertHankaku(src, &bcr, dest, &bcw);
228 dest += bcw;
229 src += bcr;
230 if (res == NS_OK) continue;
231 } else {
232 res = NS_ERROR_UENC_NOMAPPING;
233 src++;
234 }
235 }
236 if (res != NS_OK) break;
238 bcw = destEnd - dest;
239 res = ChangeCharset(i, dest, &bcw);
240 dest += bcw;
241 if (res != NS_OK) break;
243 bcr = srcEnd - src;
244 bcw = destEnd - dest;
245 res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw,
246 g_ufScanClassIDs[i], nullptr,
247 (uMappingTable *) g_ufMappingTables[i]);
248 src += bcr;
249 dest += bcw;
251 if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break;
252 if (res == NS_ERROR_UENC_NOMAPPING) src--;
253 }
255 *aSrcLength = src - aSrc;
256 *aDestLength = dest - aDest;
257 return res;
258 }
260 NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest,
261 int32_t * aDestLength)
262 {
263 ChangeCharset(0, aDest, aDestLength);
264 return NS_OK;
265 }
267 NS_IMETHODIMP nsUnicodeToISO2022JP::Reset()
268 {
269 mCharset = 0;
270 return nsEncoderSupport::Reset();
271 }