Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 #include "prmem.h"
9 #include "prprf.h"
10 #include "nsICharsetConverterManager.h"
11 #include "nsSaveAsCharset.h"
12 #include "nsWhitespaceTokenizer.h"
13 #include "nsServiceManagerUtils.h"
15 //
16 // nsISupports methods
17 //
18 NS_IMPL_ISUPPORTS(nsSaveAsCharset, nsISaveAsCharset)
20 //
21 // nsSaveAsCharset
22 //
23 nsSaveAsCharset::nsSaveAsCharset()
24 {
25 mAttribute = attr_htmlTextDefault;
26 mEntityVersion = 0;
27 mCharsetListIndex = -1;
28 }
30 nsSaveAsCharset::~nsSaveAsCharset()
31 {
32 }
34 NS_IMETHODIMP
35 nsSaveAsCharset::Init(const char *charset, uint32_t attr, uint32_t entityVersion)
36 {
37 nsresult rv = NS_OK;
39 mAttribute = attr;
40 mEntityVersion = entityVersion;
42 rv = SetupCharsetList(charset);
43 NS_ENSURE_SUCCESS(rv, rv);
45 // set up unicode encoder
46 rv = SetupUnicodeEncoder(GetNextCharset());
47 NS_ENSURE_SUCCESS(rv, rv);
49 // set up entity converter
50 if (attr_EntityNone != MASK_ENTITY(mAttribute) && !mEntityConverter)
51 mEntityConverter = do_CreateInstance(NS_ENTITYCONVERTER_CONTRACTID, &rv);
53 return rv;
54 }
56 NS_IMETHODIMP
57 nsSaveAsCharset::Convert(const char16_t *inString, char **_retval)
58 {
59 NS_ENSURE_ARG_POINTER(_retval);
60 NS_ENSURE_ARG_POINTER(inString);
61 if (0 == *inString)
62 return NS_ERROR_ILLEGAL_VALUE;
63 nsresult rv = NS_OK;
65 NS_ASSERTION(mEncoder, "need to call Init() before Convert()");
66 NS_ENSURE_TRUE(mEncoder, NS_ERROR_FAILURE);
68 *_retval = nullptr;
70 // make sure to start from the first charset in the list
71 if (mCharsetListIndex > 0) {
72 mCharsetListIndex = -1;
73 rv = SetupUnicodeEncoder(GetNextCharset());
74 NS_ENSURE_SUCCESS(rv, rv);
75 }
77 do {
78 // fallback to the next charset in the list if the last conversion failed by an unmapped character
79 if (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv) {
80 const char * charset = GetNextCharset();
81 if (!charset)
82 break;
83 rv = SetupUnicodeEncoder(charset);
84 NS_ENSURE_SUCCESS(rv, rv);
85 PR_FREEIF(*_retval);
86 }
88 if (attr_EntityBeforeCharsetConv == MASK_ENTITY(mAttribute)) {
89 NS_ASSERTION(mEntityConverter, "need to call Init() before Convert()");
90 NS_ENSURE_TRUE(mEntityConverter, NS_ERROR_FAILURE);
91 char16_t *entity = nullptr;
92 // do the entity conversion first
93 rv = mEntityConverter->ConvertToEntities(inString, mEntityVersion, &entity);
94 if(NS_SUCCEEDED(rv)) {
95 rv = DoCharsetConversion(entity, _retval);
96 nsMemory::Free(entity);
97 }
98 }
99 else
100 rv = DoCharsetConversion(inString, _retval);
102 } while (MASK_CHARSET_FALLBACK(mAttribute) && NS_ERROR_UENC_NOMAPPING == rv);
104 return rv;
105 }
107 NS_IMETHODIMP
108 nsSaveAsCharset::GetCharset(char * *aCharset)
109 {
110 NS_ENSURE_ARG(aCharset);
111 NS_ASSERTION(mCharsetListIndex >= 0, "need to call Init() first");
112 NS_ENSURE_TRUE(mCharsetListIndex >= 0, NS_ERROR_FAILURE);
114 const char* charset = mCharsetList[mCharsetListIndex].get();
115 if (!charset) {
116 *aCharset = nullptr;
117 NS_ASSERTION(charset, "make sure to call Init() with non empty charset list");
118 return NS_ERROR_FAILURE;
119 }
121 *aCharset = strdup(charset);
122 return (*aCharset) ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
123 }
125 /////////////////////////////////////////////////////////////////////////////////////////
127 #define RESERVE_FALLBACK_BYTES 512
129 // do the fallback, reallocate the buffer if necessary
130 // need to pass destination buffer info (size, current position and estimation of rest of the conversion)
131 NS_IMETHODIMP
132 nsSaveAsCharset::HandleFallBack(uint32_t character, char **outString, int32_t *bufferLength,
133 int32_t *currentPos, int32_t estimatedLength)
134 {
135 NS_ENSURE_ARG_POINTER(outString);
136 NS_ENSURE_ARG_POINTER(bufferLength);
137 NS_ENSURE_ARG_POINTER(currentPos);
139 char fallbackStr[256];
140 nsresult rv = DoConversionFallBack(character, fallbackStr, 256);
141 if (NS_SUCCEEDED(rv)) {
142 int32_t tempLen = (int32_t) strlen(fallbackStr);
144 // reallocate if the buffer is not large enough
145 if ((tempLen + estimatedLength) >= (*bufferLength - *currentPos)) {
146 int32_t addLength = tempLen + RESERVE_FALLBACK_BYTES;
147 // + 1 is for the terminating NUL, don't add that to bufferLength
148 char *temp = (char *) PR_Realloc(*outString, *bufferLength + addLength + 1);
149 if (temp) {
150 // adjust length/pointer after realloc
151 *bufferLength += addLength;
152 *outString = temp;
153 } else {
154 *outString = nullptr;
155 *bufferLength = 0;
156 return NS_ERROR_OUT_OF_MEMORY;
157 }
158 }
159 memcpy((*outString + *currentPos), fallbackStr, tempLen);
160 *currentPos += tempLen;
161 }
162 return rv;
163 }
165 NS_IMETHODIMP
166 nsSaveAsCharset::DoCharsetConversion(const char16_t *inString, char **outString)
167 {
168 NS_ENSURE_ARG_POINTER(outString);
170 *outString = nullptr;
172 nsresult rv;
173 int32_t inStringLength = NS_strlen(inString); // original input string length
174 int32_t bufferLength; // allocated buffer length
175 int32_t srcLength = inStringLength;
176 int32_t dstLength;
177 int32_t pos1, pos2;
178 nsresult saveResult = NS_OK; // to remember NS_ERROR_UENC_NOMAPPING
180 // estimate and allocate the target buffer (reserve extra memory for fallback)
181 rv = mEncoder->GetMaxLength(inString, inStringLength, &dstLength);
182 if (NS_FAILED(rv)) return rv;
184 bufferLength = dstLength + RESERVE_FALLBACK_BYTES; // extra bytes for fallback
185 // + 1 is for the terminating NUL -- we don't add that to bufferLength so that
186 // we can always write dstPtr[pos2] = '\0' even when the encoder filled the
187 // buffer.
188 char *dstPtr = (char *) PR_Malloc(bufferLength + 1);
189 if (!dstPtr) {
190 return NS_ERROR_OUT_OF_MEMORY;
191 }
193 for (pos1 = 0, pos2 = 0; pos1 < inStringLength;) {
194 // convert from unicode
195 dstLength = bufferLength - pos2;
196 NS_ASSERTION(dstLength >= 0, "out of bounds write");
197 rv = mEncoder->Convert(&inString[pos1], &srcLength, &dstPtr[pos2], &dstLength);
199 pos1 += srcLength ? srcLength : 1;
200 pos2 += dstLength;
201 dstPtr[pos2] = '\0';
203 // break: this is usually the case (no error) OR unrecoverable error
204 if (NS_ERROR_UENC_NOMAPPING != rv) break;
206 // remember this happened and reset the result
207 saveResult = rv;
208 rv = NS_OK;
210 // finish encoder, give it a chance to write extra data like escape sequences
211 dstLength = bufferLength - pos2;
212 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
213 if (NS_SUCCEEDED(rv)) {
214 pos2 += dstLength;
215 dstPtr[pos2] = '\0';
216 }
218 srcLength = inStringLength - pos1;
220 // do the fallback
221 if (!ATTR_NO_FALLBACK(mAttribute)) {
222 uint32_t unMappedChar;
223 if (NS_IS_HIGH_SURROGATE(inString[pos1-1]) &&
224 inStringLength > pos1 && NS_IS_LOW_SURROGATE(inString[pos1])) {
225 unMappedChar = SURROGATE_TO_UCS4(inString[pos1-1], inString[pos1]);
226 pos1++;
227 } else {
228 unMappedChar = inString[pos1-1];
229 }
231 rv = mEncoder->GetMaxLength(inString+pos1, inStringLength-pos1, &dstLength);
232 if (NS_FAILED(rv))
233 break;
235 rv = HandleFallBack(unMappedChar, &dstPtr, &bufferLength, &pos2, dstLength);
236 if (NS_FAILED(rv))
237 break;
238 dstPtr[pos2] = '\0';
239 }
240 }
242 if (NS_SUCCEEDED(rv)) {
243 // finish encoder, give it a chance to write extra data like escape sequences
244 dstLength = bufferLength - pos2;
245 rv = mEncoder->Finish(&dstPtr[pos2], &dstLength);
246 if (NS_SUCCEEDED(rv)) {
247 pos2 += dstLength;
248 dstPtr[pos2] = '\0';
249 }
250 }
252 if (NS_FAILED(rv)) {
253 PR_FREEIF(dstPtr);
254 return rv;
255 }
257 *outString = dstPtr; // set the result string
259 // set error code so that the caller can do own fall back
260 if (NS_ERROR_UENC_NOMAPPING == saveResult) {
261 rv = NS_ERROR_UENC_NOMAPPING;
262 }
264 return rv;
265 }
267 NS_IMETHODIMP
268 nsSaveAsCharset::DoConversionFallBack(uint32_t inUCS4, char *outString, int32_t bufferLength)
269 {
270 NS_ENSURE_ARG_POINTER(outString);
272 *outString = '\0';
274 nsresult rv = NS_OK;
276 if (ATTR_NO_FALLBACK(mAttribute)) {
277 return NS_OK;
278 }
279 if (attr_EntityAfterCharsetConv == MASK_ENTITY(mAttribute)) {
280 char *entity = nullptr;
281 rv = mEntityConverter->ConvertUTF32ToEntity(inUCS4, mEntityVersion, &entity);
282 if (NS_SUCCEEDED(rv)) {
283 if (!entity || (int32_t)strlen(entity) > bufferLength) {
284 return NS_ERROR_OUT_OF_MEMORY;
285 }
286 PL_strcpy(outString, entity);
287 nsMemory::Free(entity);
288 return rv;
289 }
290 }
292 switch (MASK_FALLBACK(mAttribute)) {
293 case attr_FallbackQuestionMark:
294 if(bufferLength>=2) {
295 *outString++='?';
296 *outString='\0';
297 rv = NS_OK;
298 } else {
299 rv = NS_ERROR_FAILURE;
300 }
301 break;
302 case attr_FallbackEscapeU:
303 if (inUCS4 & 0xff0000)
304 rv = (PR_snprintf(outString, bufferLength, "\\u%.6x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
305 else
306 rv = (PR_snprintf(outString, bufferLength, "\\u%.4x", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
307 break;
308 case attr_FallbackDecimalNCR:
309 rv = ( PR_snprintf(outString, bufferLength, "&#%u;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
310 break;
311 case attr_FallbackHexNCR:
312 rv = (PR_snprintf(outString, bufferLength, "&#x%x;", inUCS4) > 0) ? NS_OK : NS_ERROR_FAILURE;
313 break;
314 case attr_FallbackNone:
315 rv = NS_OK;
316 break;
317 default:
318 rv = NS_ERROR_ILLEGAL_VALUE;
319 break;
320 }
322 return rv;
323 }
325 nsresult nsSaveAsCharset::SetupUnicodeEncoder(const char* charset)
326 {
327 NS_ENSURE_ARG(charset);
328 nsresult rv;
330 // set up unicode encoder
331 nsCOMPtr <nsICharsetConverterManager> ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
332 NS_ENSURE_SUCCESS(rv, rv);
334 return ccm->GetUnicodeEncoder(charset, getter_AddRefs(mEncoder));
335 }
337 nsresult nsSaveAsCharset::SetupCharsetList(const char *charsetList)
338 {
339 NS_ENSURE_ARG(charsetList);
341 NS_ASSERTION(charsetList[0], "charsetList should not be empty");
342 if (!charsetList[0])
343 return NS_ERROR_INVALID_ARG;
345 if (mCharsetListIndex >= 0) {
346 mCharsetList.Clear();
347 mCharsetListIndex = -1;
348 }
350 nsCWhitespaceTokenizer tokenizer = nsDependentCString(charsetList);
351 while (tokenizer.hasMoreTokens()) {
352 ParseString(tokenizer.nextToken(), ',', mCharsetList);
353 }
355 return NS_OK;
356 }
358 const char * nsSaveAsCharset::GetNextCharset()
359 {
360 if ((mCharsetListIndex + 1) >= int32_t(mCharsetList.Length()))
361 return nullptr;
363 // bump the index and return the next charset
364 return mCharsetList[++mCharsetListIndex].get();
365 }