|
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 /** |
|
6 * A character set converter from Unicode to GBK. |
|
7 * |
|
8 * |
|
9 * @created 08/Sept/1999 |
|
10 * @author Yueheng Xu, Yueheng.Xu@intel.com |
|
11 * Revision History |
|
12 * 04/Oct/1999. Yueheng Xu: used table gUnicodeToGBKTable[0x5200] to make |
|
13 * Unicode to GB mapping fast |
|
14 */ |
|
15 |
|
16 #include "nsUnicodeToGBK.h" |
|
17 #include "gbku.h" |
|
18 #include "uconvutil.h" |
|
19 #include "nsCharTraits.h" |
|
20 |
|
21 //------------------------------------------------------------- |
|
22 // Global table initialization function defined in gbku.h |
|
23 //------------------------------------------------------------- |
|
24 |
|
25 //----------------------------------------------------------------------- |
|
26 // Private class used by nsUnicodeToGB18030 and nsUnicodeToGB18030Font0 |
|
27 // nsUnicodeToGB18030Uniq2Bytes |
|
28 //----------------------------------------------------------------------- |
|
29 static const uint16_t g_uf_gb18030_2bytes[] = { |
|
30 #include "gb18030uniq2b.uf" |
|
31 }; |
|
32 class nsUnicodeToGB18030Uniq2Bytes : public nsTableEncoderSupport |
|
33 { |
|
34 public: |
|
35 nsUnicodeToGB18030Uniq2Bytes() |
|
36 : nsTableEncoderSupport(u2BytesCharset, |
|
37 (uMappingTable*) &g_uf_gb18030_2bytes, 2) {} |
|
38 protected: |
|
39 }; |
|
40 //----------------------------------------------------------------------- |
|
41 // Private class used by nsUnicodeToGB18030 |
|
42 // nsUnicodeTo4BytesGB18030 |
|
43 //----------------------------------------------------------------------- |
|
44 static const uint16_t g_uf_gb18030_4bytes[] = { |
|
45 #include "gb180304bytes.uf" |
|
46 }; |
|
47 class nsUnicodeTo4BytesGB18030 : public nsTableEncoderSupport |
|
48 { |
|
49 public: |
|
50 nsUnicodeTo4BytesGB18030() |
|
51 : nsTableEncoderSupport(u4BytesGB18030Charset, |
|
52 (uMappingTable*) &g_uf_gb18030_4bytes, 4) {} |
|
53 protected: |
|
54 }; |
|
55 //----------------------------------------------------------------------- |
|
56 // Private class used by nsUnicodeToGBK |
|
57 // nsUnicodeToGBKUniq2Bytes |
|
58 //----------------------------------------------------------------------- |
|
59 static const uint16_t g_uf_gbk_2bytes[] = { |
|
60 #include "gbkuniq2b.uf" |
|
61 }; |
|
62 class nsUnicodeToGBKUniq2Bytes : public nsTableEncoderSupport |
|
63 { |
|
64 public: |
|
65 nsUnicodeToGBKUniq2Bytes() |
|
66 : nsTableEncoderSupport(u2BytesCharset, |
|
67 (uMappingTable*) &g_uf_gbk_2bytes, 2) {} |
|
68 protected: |
|
69 }; |
|
70 //----------------------------------------------------------------------- |
|
71 // nsUnicodeToGB18030 |
|
72 //----------------------------------------------------------------------- |
|
73 void nsUnicodeToGB18030::CreateExtensionEncoder() |
|
74 { |
|
75 mExtensionEncoder = new nsUnicodeToGB18030Uniq2Bytes(); |
|
76 } |
|
77 void nsUnicodeToGB18030::Create4BytesEncoder() |
|
78 { |
|
79 m4BytesEncoder = new nsUnicodeTo4BytesGB18030(); |
|
80 } |
|
81 |
|
82 bool nsUnicodeToGB18030::EncodeSurrogate( |
|
83 char16_t aSurrogateHigh, |
|
84 char16_t aSurrogateLow, |
|
85 char* aOut) |
|
86 { |
|
87 if( NS_IS_HIGH_SURROGATE(aSurrogateHigh) && |
|
88 NS_IS_LOW_SURROGATE(aSurrogateLow) ) |
|
89 { |
|
90 // notice that idx does not include the 0x10000 |
|
91 uint32_t idx = ((aSurrogateHigh - (char16_t)0xD800) << 10 ) | |
|
92 (aSurrogateLow - (char16_t) 0xDC00); |
|
93 |
|
94 unsigned char *out = (unsigned char*) aOut; |
|
95 // notice this is from 0x90 for supplment planes |
|
96 out[0] = (idx / (10*126*10)) + 0x90; |
|
97 idx %= (10*126*10); |
|
98 out[1] = (idx / (10*126)) + 0x30; |
|
99 idx %= (10*126); |
|
100 out[2] = (idx / (10)) + 0x81; |
|
101 out[3] = (idx % 10) + 0x30; |
|
102 return true; |
|
103 } |
|
104 return false; |
|
105 } |
|
106 |
|
107 //---------------------------------------------------------------------- |
|
108 // Class nsUnicodeToGBK [implementation] |
|
109 |
|
110 nsUnicodeToGBK::nsUnicodeToGBK(uint32_t aMaxLength) : |
|
111 nsEncoderSupport(aMaxLength) |
|
112 { |
|
113 mExtensionEncoder = nullptr; |
|
114 m4BytesEncoder = nullptr; |
|
115 mSurrogateHigh = 0; |
|
116 } |
|
117 void nsUnicodeToGBK::CreateExtensionEncoder() |
|
118 { |
|
119 mExtensionEncoder = new nsUnicodeToGBKUniq2Bytes(); |
|
120 } |
|
121 void nsUnicodeToGBK::Create4BytesEncoder() |
|
122 { |
|
123 m4BytesEncoder = nullptr; |
|
124 } |
|
125 bool nsUnicodeToGBK::TryExtensionEncoder( |
|
126 char16_t aChar, |
|
127 char* aOut, |
|
128 int32_t *aOutLen |
|
129 ) |
|
130 { |
|
131 if( NS_IS_HIGH_SURROGATE(aChar) || |
|
132 NS_IS_LOW_SURROGATE(aChar) ) |
|
133 { |
|
134 // performance tune for surrogate characters |
|
135 return false; |
|
136 } |
|
137 if(! mExtensionEncoder ) |
|
138 CreateExtensionEncoder(); |
|
139 if(mExtensionEncoder) |
|
140 { |
|
141 int32_t len = 1; |
|
142 nsresult res = NS_OK; |
|
143 res = mExtensionEncoder->Convert(&aChar, &len, aOut, aOutLen); |
|
144 if(NS_SUCCEEDED(res) && (*aOutLen > 0)) |
|
145 return true; |
|
146 } |
|
147 return false; |
|
148 } |
|
149 |
|
150 bool nsUnicodeToGBK::Try4BytesEncoder( |
|
151 char16_t aChar, |
|
152 char* aOut, |
|
153 int32_t *aOutLen |
|
154 ) |
|
155 { |
|
156 if( NS_IS_HIGH_SURROGATE(aChar) || |
|
157 NS_IS_LOW_SURROGATE(aChar) ) |
|
158 { |
|
159 // performance tune for surrogate characters |
|
160 return false; |
|
161 } |
|
162 if(! m4BytesEncoder ) |
|
163 Create4BytesEncoder(); |
|
164 if(m4BytesEncoder) |
|
165 { |
|
166 int32_t len = 1; |
|
167 nsresult res = NS_OK; |
|
168 res = m4BytesEncoder->Convert(&aChar, &len, aOut, aOutLen); |
|
169 NS_ASSERTION(NS_FAILED(res) || ((1 == len) && (4 == *aOutLen)), |
|
170 "unexpect conversion length"); |
|
171 if(NS_SUCCEEDED(res) && (*aOutLen > 0)) |
|
172 return true; |
|
173 } |
|
174 return false; |
|
175 } |
|
176 bool nsUnicodeToGBK::EncodeSurrogate( |
|
177 char16_t aSurrogateHigh, |
|
178 char16_t aSurrogateLow, |
|
179 char* aOut) |
|
180 { |
|
181 return false; // GBK cannot encode Surrogate, let the subclass encode it. |
|
182 } |
|
183 |
|
184 NS_IMETHODIMP nsUnicodeToGBK::ConvertNoBuff( |
|
185 const char16_t * aSrc, |
|
186 int32_t * aSrcLength, |
|
187 char * aDest, |
|
188 int32_t * aDestLength) |
|
189 { |
|
190 int32_t iSrcLength = 0; |
|
191 int32_t iDestLength = 0; |
|
192 char16_t unicode; |
|
193 nsresult res = NS_OK; |
|
194 while (iSrcLength < *aSrcLength ) |
|
195 { |
|
196 unicode = *aSrc; |
|
197 //if unicode's hi byte has something, it is not ASCII, must be a GB |
|
198 if(IS_ASCII(unicode)) |
|
199 { |
|
200 // this is an ASCII |
|
201 *aDest = CAST_UNICHAR_TO_CHAR(*aSrc); |
|
202 aDest++; // increment 1 byte |
|
203 iDestLength +=1; |
|
204 } else { |
|
205 char byte1, byte2; |
|
206 if(mUtil.UnicodeToGBKChar( unicode, false, &byte1, &byte2)) |
|
207 { |
|
208 // make sure we still have 2 bytes for output first |
|
209 if(iDestLength+2 > *aDestLength) |
|
210 { |
|
211 res = NS_OK_UENC_MOREOUTPUT; |
|
212 break; |
|
213 } |
|
214 aDest[0] = byte1; |
|
215 aDest[1] = byte2; |
|
216 aDest += 2; // increment 2 bytes |
|
217 iDestLength +=2; |
|
218 } else { |
|
219 int32_t aOutLen = 2; |
|
220 // make sure we still have 2 bytes for output first |
|
221 if(iDestLength+2 > *aDestLength) |
|
222 { |
|
223 res = NS_OK_UENC_MOREOUTPUT; |
|
224 break; |
|
225 } |
|
226 // we cannot map in the common mapping. Let's try to |
|
227 // call the delegated 2 byte converter for the gbk or gb18030 |
|
228 // unique 2 byte mapping |
|
229 if(TryExtensionEncoder(unicode, aDest, &aOutLen)) |
|
230 { |
|
231 iDestLength += aOutLen; |
|
232 aDest += aOutLen; |
|
233 } else { |
|
234 // make sure we still have 4 bytes for output first |
|
235 if(iDestLength+4 > *aDestLength) |
|
236 { |
|
237 res = NS_OK_UENC_MOREOUTPUT; |
|
238 break; |
|
239 } |
|
240 // we still cannot map. Let's try to |
|
241 // call the delegated GB18030 4 byte converter |
|
242 aOutLen = 4; |
|
243 if( NS_IS_HIGH_SURROGATE(unicode) ) |
|
244 { |
|
245 if((iSrcLength+1) < *aSrcLength ) { |
|
246 if(EncodeSurrogate(aSrc[0],aSrc[1], aDest)) { |
|
247 // since we got a surrogate pair, we need to increment src. |
|
248 iSrcLength++ ; |
|
249 aSrc++; |
|
250 iDestLength += aOutLen; |
|
251 aDest += aOutLen; |
|
252 } else { |
|
253 // only get a high surrogate, but not a low surrogate |
|
254 res = NS_ERROR_UENC_NOMAPPING; |
|
255 iSrcLength++; // include length of the unmapped character |
|
256 break; |
|
257 } |
|
258 } else { |
|
259 mSurrogateHigh = aSrc[0]; |
|
260 break; // this will go to afterwhileloop |
|
261 } |
|
262 } else { |
|
263 if( NS_IS_LOW_SURROGATE(unicode) ) |
|
264 { |
|
265 if(NS_IS_HIGH_SURROGATE(mSurrogateHigh)) { |
|
266 if(EncodeSurrogate(mSurrogateHigh, aSrc[0], aDest)) { |
|
267 iDestLength += aOutLen; |
|
268 aDest += aOutLen; |
|
269 } else { |
|
270 // only get a high surrogate, but not a low surrogate |
|
271 res = NS_ERROR_UENC_NOMAPPING; |
|
272 iSrcLength++; // include length of the unmapped character |
|
273 break; |
|
274 } |
|
275 } else { |
|
276 // only get a low surrogate, but not a low surrogate |
|
277 res = NS_ERROR_UENC_NOMAPPING; |
|
278 iSrcLength++; // include length of the unmapped character |
|
279 break; |
|
280 } |
|
281 } else { |
|
282 if(Try4BytesEncoder(unicode, aDest, &aOutLen)) |
|
283 { |
|
284 NS_ASSERTION((aOutLen == 4), "we should always generate 4 bytes here"); |
|
285 iDestLength += aOutLen; |
|
286 aDest += aOutLen; |
|
287 } else { |
|
288 res = NS_ERROR_UENC_NOMAPPING; |
|
289 iSrcLength++; // include length of the unmapped character |
|
290 break; |
|
291 } |
|
292 } |
|
293 } |
|
294 } |
|
295 } |
|
296 } |
|
297 iSrcLength++ ; // Each unicode char just count as one in char16_t string; |
|
298 mSurrogateHigh = 0; |
|
299 aSrc++; |
|
300 if ( iDestLength >= (*aDestLength) && (iSrcLength < *aSrcLength) ) |
|
301 { |
|
302 res = NS_OK_UENC_MOREOUTPUT; |
|
303 break; |
|
304 } |
|
305 } |
|
306 //afterwhileloop: |
|
307 *aDestLength = iDestLength; |
|
308 *aSrcLength = iSrcLength; |
|
309 return res; |
|
310 } |