|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 #include "nsUnicodeToISO2022JP.h" |
|
7 #include "nsUCVJADll.h" |
|
8 #include "nsUnicodeEncodeHelper.h" |
|
9 |
|
10 //---------------------------------------------------------------------- |
|
11 // Global functions and data [declaration] |
|
12 |
|
13 // Basic mapping from Hankaku to Zenkaku |
|
14 // Nigori and Maru are taken care of outside this basic mapping |
|
15 static const char16_t gBasicMapping[0x40] = |
|
16 { |
|
17 // 0xff60 |
|
18 0xff60,0x3002,0x300c,0x300d,0x3001,0x30fb,0x30f2,0x30a1, |
|
19 // 0xff68 |
|
20 0x30a3,0x30a5,0x30a7,0x30a9,0x30e3,0x30e5,0x30e7,0x30c3, |
|
21 // 0xff70 |
|
22 0x30fc,0x30a2,0x30a4,0x30a6,0x30a8,0x30aa,0x30ab,0x30ad, |
|
23 // 0xff78 |
|
24 0x30af,0x30b1,0x30b3,0x30b5,0x30b7,0x30b9,0x30bb,0x30bd, |
|
25 // 0xff80 |
|
26 0x30bf,0x30c1,0x30c4,0x30c6,0x30c8,0x30ca,0x30cb,0x30cc, |
|
27 // 0xff88 |
|
28 0x30cd,0x30ce,0x30cf,0x30d2,0x30d5,0x30d8,0x30db,0x30de, |
|
29 // 0xff90 |
|
30 0x30df,0x30e0,0x30e1,0x30e2,0x30e4,0x30e6,0x30e8,0x30e9, |
|
31 // 0xff98 |
|
32 0x30ea,0x30eb,0x30ec,0x30ed,0x30ef,0x30f3,0x309b,0x309c |
|
33 }; |
|
34 |
|
35 // Do we need to check for Nigori for the next unicode ? |
|
36 #define NEED_TO_CHECK_NIGORI(u) (((0xff76<=(u))&&((u)<=0xff84))||((0xff8a<=(u))&&((u)<=0xff8e))) |
|
37 |
|
38 // Do we need to check for Maru for the next unicode ? |
|
39 #define NEED_TO_CHECK_MARU(u) ((0xff8a<=(u))&&((u)<=0xff8e)) |
|
40 |
|
41 // The unicode is in Katakana Hankaku block |
|
42 #define IS_HANKAKU(u) ((0xff61 <= (u)) && ((u) <= 0xff9f)) |
|
43 #define IS_NIGORI(u) (0xff9e == (u)) |
|
44 #define IS_MARU(u) (0xff9f == (u)) |
|
45 #define NIGORI_MODIFIER 1 |
|
46 #define MARU_MODIFIER 2 |
|
47 |
|
48 static const uint16_t g_ufAsciiMapping [] = { |
|
49 0x0001, 0x0004, 0x0005, 0x0008, 0x0000, 0x0000, 0x007F, 0x0000 |
|
50 }; |
|
51 |
|
52 #define SIZE_OF_ISO2022JP_TABLES 5 |
|
53 static const uint16_t * g_ufMappingTables[SIZE_OF_ISO2022JP_TABLES] = { |
|
54 g_ufAsciiMapping, // ASCII ISOREG 6 |
|
55 g_uf0201GLMapping, // JIS X 0201-1976 ISOREG 14 |
|
56 g_uf0208Mapping, // JIS X 0208-1983 ISOREG 87 |
|
57 g_uf0208extMapping, // JIS X 0208 - cp932 ext |
|
58 g_uf0208Mapping, // JIS X 0208-1978 ISOREG 42 |
|
59 }; |
|
60 |
|
61 static const uScanClassID g_ufScanClassIDs[SIZE_OF_ISO2022JP_TABLES] = { |
|
62 u1ByteCharset, // ASCII ISOREG 6 |
|
63 u1ByteCharset, // JIS X 0201-1976 ISOREG 14 |
|
64 u2BytesCharset, // JIS X 0208-1983 ISOREG 87 |
|
65 u2BytesCharset, // JIS X 0208- cp932 ext |
|
66 u2BytesCharset, // JIS X 0208-1978 ISOREG 42 |
|
67 }; |
|
68 #define JIS_X_208_INDEX 2 |
|
69 |
|
70 //---------------------------------------------------------------------- |
|
71 // Class nsUnicodeToISO2022JP [implementation] |
|
72 |
|
73 // worst case max length: |
|
74 // 1 2 3 4 5 6 7 8 |
|
75 // ESC $ B XX XX ESC ( B |
|
76 nsUnicodeToISO2022JP::nsUnicodeToISO2022JP() |
|
77 : nsEncoderSupport(8) |
|
78 { |
|
79 Reset(); |
|
80 } |
|
81 |
|
82 nsUnicodeToISO2022JP::~nsUnicodeToISO2022JP() |
|
83 { |
|
84 } |
|
85 |
|
86 nsresult nsUnicodeToISO2022JP::ChangeCharset(int32_t aCharset, |
|
87 char * aDest, |
|
88 int32_t * aDestLength) |
|
89 { |
|
90 // both 2 and 3 generate the same escape sequence. 2 is for |
|
91 // the standard JISx0208 table, and 3 is for theCP932 extensions |
|
92 // therefore, we treat them as the same one. |
|
93 if(((2 == aCharset) && ( 3 == mCharset)) || |
|
94 ((3 == aCharset) && ( 2 == mCharset)) ) |
|
95 { |
|
96 mCharset = aCharset; |
|
97 } |
|
98 |
|
99 if(aCharset == mCharset) |
|
100 { |
|
101 *aDestLength = 0; |
|
102 return NS_OK; |
|
103 } |
|
104 |
|
105 if (*aDestLength < 3) { |
|
106 *aDestLength = 0; |
|
107 return NS_OK_UENC_MOREOUTPUT; |
|
108 } |
|
109 |
|
110 switch (aCharset) { |
|
111 case 0: // ASCII ISOREG 6 |
|
112 aDest[0] = 0x1b; |
|
113 aDest[1] = '('; |
|
114 aDest[2] = 'B'; |
|
115 break; |
|
116 case 1: // JIS X 0201-1976 ("Roman" set) ISOREG 14 |
|
117 aDest[0] = 0x1b; |
|
118 aDest[1] = '('; |
|
119 aDest[2] = 'J'; |
|
120 break; |
|
121 case 2: // JIS X 0208-1983 ISOREG 87 |
|
122 case 3: // JIS X 0208-1983 |
|
123 // we currently use this for CP932 ext |
|
124 aDest[0] = 0x1b; |
|
125 aDest[1] = '$'; |
|
126 aDest[2] = 'B'; |
|
127 break; |
|
128 case 4: // JIS X 0201-1978 ISOREG 87- |
|
129 // we currently do not have a diff mapping for it. |
|
130 aDest[0] = 0x1b; |
|
131 aDest[1] = '$'; |
|
132 aDest[2] = '@'; |
|
133 break; |
|
134 } |
|
135 |
|
136 mCharset = aCharset; |
|
137 *aDestLength = 3; |
|
138 return NS_OK; |
|
139 } |
|
140 |
|
141 nsresult nsUnicodeToISO2022JP::ConvertHankaku(const char16_t * aSrc, |
|
142 int32_t * aSrcLength, |
|
143 char * aDest, |
|
144 int32_t * aDestLength) |
|
145 { |
|
146 nsresult res = NS_OK; |
|
147 |
|
148 const char16_t * src = aSrc; |
|
149 const char16_t * srcEnd = aSrc + *aSrcLength; |
|
150 char * dest = aDest; |
|
151 char * destEnd = aDest + *aDestLength; |
|
152 char16_t srcChar, tempChar; |
|
153 int32_t bcr, bcw; |
|
154 |
|
155 bcw = destEnd - dest; |
|
156 res = ChangeCharset(JIS_X_208_INDEX, dest, &bcw); |
|
157 dest += bcw; |
|
158 if (res != NS_OK) { |
|
159 return res; |
|
160 } |
|
161 |
|
162 while (src < srcEnd) { |
|
163 srcChar = *src; |
|
164 if (!IS_HANKAKU(srcChar)) { |
|
165 break; |
|
166 } |
|
167 ++src; |
|
168 tempChar = gBasicMapping[(srcChar) - 0xff60]; |
|
169 |
|
170 if (src < srcEnd) { |
|
171 // if the character could take a modifier, and the next char |
|
172 // is a modifier, modify it and eat one char16_t |
|
173 if (NEED_TO_CHECK_NIGORI(srcChar) && IS_NIGORI(*src)) { |
|
174 tempChar += NIGORI_MODIFIER; |
|
175 ++src; |
|
176 } else if (NEED_TO_CHECK_MARU(srcChar) && IS_MARU(*src)) { |
|
177 tempChar += MARU_MODIFIER; |
|
178 ++src; |
|
179 } |
|
180 } |
|
181 bcr = 1; |
|
182 bcw = destEnd - dest; |
|
183 res = nsUnicodeEncodeHelper::ConvertByTable( |
|
184 &tempChar, &bcr, dest, &bcw, g_ufScanClassIDs[JIS_X_208_INDEX], |
|
185 nullptr, (uMappingTable *) g_ufMappingTables[JIS_X_208_INDEX]); |
|
186 dest += bcw; |
|
187 if (res != NS_OK) |
|
188 break; |
|
189 } |
|
190 *aDestLength = dest - aDest; |
|
191 *aSrcLength = src - aSrc; |
|
192 return res; |
|
193 } |
|
194 |
|
195 //---------------------------------------------------------------------- |
|
196 // Subclassing of nsTableEncoderSupport class [implementation] |
|
197 |
|
198 NS_IMETHODIMP nsUnicodeToISO2022JP::ConvertNoBuffNoErr( |
|
199 const char16_t * aSrc, |
|
200 int32_t * aSrcLength, |
|
201 char * aDest, |
|
202 int32_t * aDestLength) |
|
203 { |
|
204 nsresult res = NS_OK; |
|
205 |
|
206 const char16_t * src = aSrc; |
|
207 const char16_t * srcEnd = aSrc + *aSrcLength; |
|
208 char * dest = aDest; |
|
209 char * destEnd = aDest + *aDestLength; |
|
210 int32_t bcr, bcw; |
|
211 int32_t i; |
|
212 |
|
213 while (src < srcEnd) { |
|
214 for (i=0; i< SIZE_OF_ISO2022JP_TABLES ; i++) { |
|
215 bcr = 1; |
|
216 bcw = destEnd - dest; |
|
217 res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, |
|
218 g_ufScanClassIDs[i], nullptr, |
|
219 (uMappingTable *) g_ufMappingTables[i]); |
|
220 if (res != NS_ERROR_UENC_NOMAPPING) break; |
|
221 } |
|
222 |
|
223 if ( i>= SIZE_OF_ISO2022JP_TABLES) { |
|
224 if (IS_HANKAKU(*src)) { |
|
225 bcr = srcEnd - src; |
|
226 bcw = destEnd - dest; |
|
227 res = ConvertHankaku(src, &bcr, dest, &bcw); |
|
228 dest += bcw; |
|
229 src += bcr; |
|
230 if (res == NS_OK) continue; |
|
231 } else { |
|
232 res = NS_ERROR_UENC_NOMAPPING; |
|
233 src++; |
|
234 } |
|
235 } |
|
236 if (res != NS_OK) break; |
|
237 |
|
238 bcw = destEnd - dest; |
|
239 res = ChangeCharset(i, dest, &bcw); |
|
240 dest += bcw; |
|
241 if (res != NS_OK) break; |
|
242 |
|
243 bcr = srcEnd - src; |
|
244 bcw = destEnd - dest; |
|
245 res = nsUnicodeEncodeHelper::ConvertByTable(src, &bcr, dest, &bcw, |
|
246 g_ufScanClassIDs[i], nullptr, |
|
247 (uMappingTable *) g_ufMappingTables[i]); |
|
248 src += bcr; |
|
249 dest += bcw; |
|
250 |
|
251 if ((res != NS_OK) && (res != NS_ERROR_UENC_NOMAPPING)) break; |
|
252 if (res == NS_ERROR_UENC_NOMAPPING) src--; |
|
253 } |
|
254 |
|
255 *aSrcLength = src - aSrc; |
|
256 *aDestLength = dest - aDest; |
|
257 return res; |
|
258 } |
|
259 |
|
260 NS_IMETHODIMP nsUnicodeToISO2022JP::FinishNoBuff(char * aDest, |
|
261 int32_t * aDestLength) |
|
262 { |
|
263 ChangeCharset(0, aDest, aDestLength); |
|
264 return NS_OK; |
|
265 } |
|
266 |
|
267 NS_IMETHODIMP nsUnicodeToISO2022JP::Reset() |
|
268 { |
|
269 mCharset = 0; |
|
270 return nsEncoderSupport::Reset(); |
|
271 } |