|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 |
|
8 #include "prmem.h" |
|
9 #include "prprf.h" |
|
10 #include "nsICharsetConverterManager.h" |
|
11 #include "nsSaveAsCharset.h" |
|
12 #include "nsWhitespaceTokenizer.h" |
|
13 #include "nsServiceManagerUtils.h" |
|
14 |
|
15 // |
|
16 // nsISupports methods |
|
17 // |
|
18 NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset) |
|
19 |
|
20 // |
|
21 // nsSaveAsCharset |
|
22 // |
|
23 nsSaveAsCharset::nsSaveAsCharset() |
|
24 { |
|
25 mAttribute = attr_htmlTextDefault; |
|
26 mEntityVersion = 0; |
|
27 mCharsetListIndex = -1; |
|
28 } |
|
29 |
|
30 nsSaveAsCharset::~nsSaveAsCharset() |
|
31 { |
|
32 } |
|
33 |
|
34 NS_IMETHODIMP |
|
35 nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion) |
|
36 { |
|
37 nsresult rv = NS_OK; |
|
38 |
|
39 mAttribute = attr; |
|
40 mEntityVersion = entityVersion; |
|
41 |
|
42 rv = SetupCharsetList(charset); |
|
43 NS_ENSURE_SUCCESS(rv, rv); |
|
44 |
|
45 // set up unicode encoder |
|
46 rv = SetupUnicodeEncoder(GetNextCharset()); |
|
47 NS_ENSURE_SUCCESS(rv, rv); |
|
48 |
|
49 // set up entity converter |
|
50 if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter) |
|
51 mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv); |
|
52 |
|
53 return rv; |
|
54 } |
|
55 |
|
56 NS_IMETHODIMP |
|
57 nsSaveAsCharset::Convert(const char16_t *inString, char **_retval) |
|
58 { |
|
59 NS_ENSURE_ARG_POINTER(_retval); |
|
60 NS_ENSURE_ARG_POINTER(inString); |
|
61 if (0 == *inString) |
|
62 return NS_ERROR_ILLEGAL_VALUE; |
|
63 nsresult rv = NS_OK; |
|
64 |
|
65 NS_ASSERTION(mEncoder, "need to call Init() before Convert()"); |
|
66 NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE); |
|
67 |
|
68 *_retval = nullptr; |
|
69 |
|
70 // make sure to start from the first charset in the list |
|
71 if (mCharsetListIndex > 0) { |
|
72 mCharsetListIndex = -1; |
|
73 rv = SetupUnicodeEncoder(GetNextCharset()); |
|
74 NS_ENSURE_SUCCESS(rv, rv); |
|
75 } |
|
76 |
|
77 do { |
|
78 // fallback to the next charset in the list if the last conversion failed by an unmapped character |
|
79 if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) { |
|
80 const char * charset = GetNextCharset(); |
|
81 if (!charset) |
|
82 break; |
|
83 rv = SetupUnicodeEncoder(charset); |
|
84 NS_ENSURE_SUCCESS(rv, rv); |
|
85 PR_FREEIF(*_retval); |
|
86 } |
|
87 |
|
88 if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) { |
|
89 NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()"); |
|
90 NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE); |
|
91 char16_t *entity = nullptr; |
|
92 // do the entity conversion first |
|
93 rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity); |
|
94 if(NS_SUCCEEDED(rv)) { |
|
95 rv = DoCharsetConversion(entity, _retval); |
|
96 nsMemory::Free(entity); |
|
97 } |
|
98 } |
|
99 else |
|
100 rv = DoCharsetConversion(inString, _retval); |
|
101 |
|
102 } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv); |
|
103 |
|
104 return rv; |
|
105 } |
|
106 |
|
107 NS_IMETHODIMP |
|
108 nsSaveAsCharset::GetCharset(char * *aCharset) |
|
109 { |
|
110 NS_ENSURE_ARG(aCharset); |
|
111 NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first"); |
|
112 NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE); |
|
113 |
|
114 const char* charset = mCharsetList[mCharsetListIndex].get(); |
|
115 if (!charset) { |
|
116 *aCharset = nullptr; |
|
117 NS_ASSERTION(charset, "make sure to call Init() with non empty charset list"); |
|
118 return NS_ERROR_FAILURE; |
|
119 } |
|
120 |
|
121 *aCharset = strdup(charset); |
|
122 return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY; |
|
123 } |
|
124 |
|
125 ///////////////////////////////////////////////////////////////////////////////////////// |
|
126 |
|
127 #define RESERVE_FALLBACK_BYTES 512 |
|
128 |
|
129 // do the fallback, reallocate the buffer if necessary |
|
130 // need to pass destination buffer info (size, current position and estimation of rest of the conversion) |
|
131 NS_IMETHODIMP |
|
132 nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength, |
|
133 int32_t *currentPos, int32_t estimatedLength) |
|
134 { |
|
135 NS_ENSURE_ARG_POINTER(outString); |
|
136 NS_ENSURE_ARG_POINTER(bufferLength); |
|
137 NS_ENSURE_ARG_POINTER(currentPos); |
|
138 |
|
139 char fallbackStr[256]; |
|
140 nsresult rv = DoConversionFallBack(character, fallbackStr, 256); |
|
141 if (NS_SUCCEEDED(rv)) { |
|
142 int32_t tempLen = (int32_t) strlen(fallbackStr); |
|
143 |
|
144 // reallocate if the buffer is not large enough |
|
145 if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) { |
|
146 int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES; |
|
147 // + 1 is for the terminating NUL, don't add that to bufferLength |
|
148 char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1); |
|
149 if (temp) { |
|
150 // adjust length/pointer after realloc |
|
151 *bufferLength += addLength; |
|
152 *outString = temp; |
|
153 } else { |
|
154 *outString = nullptr; |
|
155 *bufferLength = 0; |
|
156 return NS_ERROR_OUT_OF_MEMORY; |
|
157 } |
|
158 } |
|
159 memcpy((*outString + *currentPos), fallbackStr, tempLen); |
|
160 *currentPos += tempLen; |
|
161 } |
|
162 return rv; |
|
163 } |
|
164 |
|
165 NS_IMETHODIMP |
|
166 nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString) |
|
167 { |
|
168 NS_ENSURE_ARG_POINTER(outString); |
|
169 |
|
170 *outString = nullptr; |
|
171 |
|
172 nsresult rv; |
|
173 int32_t inStringLength = NS_strlen(inString); // original input string length |
|
174 int32_t bufferLength; // allocated buffer length |
|
175 int32_t srcLength = inStringLength; |
|
176 int32_t dstLength; |
|
177 int32_t pos1, pos2; |
|
178 nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING |
|
179 |
|
180 // estimate and allocate the target buffer (reserve extra memory for fallback) |
|
181 rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength); |
|
182 if (NS_FAILED(rv)) return rv; |
|
183 |
|
184 bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback |
|
185 // + 1 is for the terminating NUL -- we don't add that to bufferLength so that |
|
186 // we can always write dstPtr[pos2] = '\0' even when the encoder filled the |
|
187 // buffer. |
|
188 char *dstPtr = (char *) PR_Malloc(bufferLength + 1); |
|
189 if (!dstPtr) { |
|
190 return NS_ERROR_OUT_OF_MEMORY; |
|
191 } |
|
192 |
|
193 for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) { |
|
194 // convert from unicode |
|
195 dstLength = bufferLength - pos2; |
|
196 NS_ASSERTION(dstLength >= 0, "out of bounds write"); |
|
197 rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength); |
|
198 |
|
199 pos1 += srcLength ? srcLength : 1; |
|
200 pos2 += dstLength; |
|
201 dstPtr[pos2] = '\0'; |
|
202 |
|
203 // break: this is usually the case (no error) OR unrecoverable error |
|
204 if (NS_ERROR_UENC_NOMAPPING != rv) break; |
|
205 |
|
206 // remember this happened and reset the result |
|
207 saveResult = rv; |
|
208 rv = NS_OK; |
|
209 |
|
210 // finish encoder, give it a chance to write extra data like escape sequences |
|
211 dstLength = bufferLength - pos2; |
|
212 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); |
|
213 if (NS_SUCCEEDED(rv)) { |
|
214 pos2 += dstLength; |
|
215 dstPtr[pos2] = '\0'; |
|
216 } |
|
217 |
|
218 srcLength = inStringLength - pos1; |
|
219 |
|
220 // do the fallback |
|
221 if (!ATTR_NO_FALLBACK(mAttribute)) { |
|
222 uint32_t unMappedChar; |
|
223 if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) && |
|
224 inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) { |
|
225 unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]); |
|
226 pos1++; |
|
227 } else { |
|
228 unMappedChar = inString[pos1-1]; |
|
229 } |
|
230 |
|
231 rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength); |
|
232 if (NS_FAILED(rv)) |
|
233 break; |
|
234 |
|
235 rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength); |
|
236 if (NS_FAILED(rv)) |
|
237 break; |
|
238 dstPtr[pos2] = '\0'; |
|
239 } |
|
240 } |
|
241 |
|
242 if (NS_SUCCEEDED(rv)) { |
|
243 // finish encoder, give it a chance to write extra data like escape sequences |
|
244 dstLength = bufferLength - pos2; |
|
245 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength); |
|
246 if (NS_SUCCEEDED(rv)) { |
|
247 pos2 += dstLength; |
|
248 dstPtr[pos2] = '\0'; |
|
249 } |
|
250 } |
|
251 |
|
252 if (NS_FAILED(rv)) { |
|
253 PR_FREEIF(dstPtr); |
|
254 return rv; |
|
255 } |
|
256 |
|
257 *outString = dstPtr; // set the result string |
|
258 |
|
259 // set error code so that the caller can do own fall back |
|
260 if (NS_ERROR_UENC_NOMAPPING == saveResult) { |
|
261 rv = NS_ERROR_UENC_NOMAPPING; |
|
262 } |
|
263 |
|
264 return rv; |
|
265 } |
|
266 |
|
267 NS_IMETHODIMP |
|
268 nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength) |
|
269 { |
|
270 NS_ENSURE_ARG_POINTER(outString); |
|
271 |
|
272 *outString = '\0'; |
|
273 |
|
274 nsresult rv = NS_OK; |
|
275 |
|
276 if (ATTR_NO_FALLBACK(mAttribute)) { |
|
277 return NS_OK; |
|
278 } |
|
279 if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) { |
|
280 char *entity = nullptr; |
|
281 rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity); |
|
282 if (NS_SUCCEEDED(rv)) { |
|
283 if (!entity || (int32_t)strlen(entity) > bufferLength) { |
|
284 return NS_ERROR_OUT_OF_MEMORY; |
|
285 } |
|
286 PL_strcpy(outString, entity); |
|
287 nsMemory::Free(entity); |
|
288 return rv; |
|
289 } |
|
290 } |
|
291 |
|
292 switch (MASK_FALLBACK(mAttribute)) { |
|
293 case attr_FallbackQuestionMark: |
|
294 if(bufferLength>=2) { |
|
295 *outString++='?'; |
|
296 *outString='\0'; |
|
297 rv = NS_OK; |
|
298 } else { |
|
299 rv = NS_ERROR_FAILURE; |
|
300 } |
|
301 break; |
|
302 case attr_FallbackEscapeU: |
|
303 if (inUCS4 & 0xff0000) |
|
304 rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
|
305 else |
|
306 rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
|
307 break; |
|
308 case attr_FallbackDecimalNCR: |
|
309 rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
|
310 break; |
|
311 case attr_FallbackHexNCR: |
|
312 rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE; |
|
313 break; |
|
314 case attr_FallbackNone: |
|
315 rv = NS_OK; |
|
316 break; |
|
317 default: |
|
318 rv = NS_ERROR_ILLEGAL_VALUE; |
|
319 break; |
|
320 } |
|
321 |
|
322 return rv; |
|
323 } |
|
324 |
|
325 nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset) |
|
326 { |
|
327 NS_ENSURE_ARG(charset); |
|
328 nsresult rv; |
|
329 |
|
330 // set up unicode encoder |
|
331 nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); |
|
332 NS_ENSURE_SUCCESS(rv, rv); |
|
333 |
|
334 return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder)); |
|
335 } |
|
336 |
|
337 nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList) |
|
338 { |
|
339 NS_ENSURE_ARG(charsetList); |
|
340 |
|
341 NS_ASSERTION(charsetList[0], "charsetList should not be empty"); |
|
342 if (!charsetList[0]) |
|
343 return NS_ERROR_INVALID_ARG; |
|
344 |
|
345 if (mCharsetListIndex >= 0) { |
|
346 mCharsetList.Clear(); |
|
347 mCharsetListIndex = -1; |
|
348 } |
|
349 |
|
350 nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList); |
|
351 while (tokenizer.hasMoreTokens()) { |
|
352 ParseString(tokenizer.nextToken(), ',', mCharsetList); |
|
353 } |
|
354 |
|
355 return NS_OK; |
|
356 } |
|
357 |
|
358 const char * nsSaveAsCharset::GetNextCharset() |
|
359 { |
|
360 if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length())) |
|
361 return nullptr; |
|
362 |
|
363 // bump the index and return the next charset |
|
364 return mCharsetList[++mCharsetListIndex].get(); |
|
365 } |