|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set sw=4 ts=8 et tw=80 : */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #include <string.h> |
|
8 #include "prmem.h" |
|
9 #include "prprf.h" |
|
10 #include "plstr.h" |
|
11 #include "plbase64.h" |
|
12 #include "nsCRT.h" |
|
13 #include "nsMemory.h" |
|
14 #include "nsTArray.h" |
|
15 #include "nsCOMPtr.h" |
|
16 #include "nsEscape.h" |
|
17 #include "nsIUTF8ConverterService.h" |
|
18 #include "nsUConvCID.h" |
|
19 #include "nsIServiceManager.h" |
|
20 #include "nsMIMEHeaderParamImpl.h" |
|
21 #include "nsReadableUtils.h" |
|
22 #include "nsNativeCharsetUtils.h" |
|
23 #include "nsError.h" |
|
24 #include "nsIUnicodeDecoder.h" |
|
25 #include "mozilla/dom/EncodingUtils.h" |
|
26 |
|
27 using mozilla::dom::EncodingUtils; |
|
28 |
|
29 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp |
|
30 |
|
31 static char *DecodeQ(const char *, uint32_t); |
|
32 static bool Is7bitNonAsciiString(const char *, uint32_t); |
|
33 static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &); |
|
34 static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&); |
|
35 static nsresult internalDecodeParameter(const nsACString&, const char*, |
|
36 const char*, bool, bool, nsACString&); |
|
37 |
|
38 // XXX The chance of UTF-7 being used in the message header is really |
|
39 // low, but in theory it's possible. |
|
40 #define IS_7BIT_NON_ASCII_CHARSET(cset) \ |
|
41 (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \ |
|
42 !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \ |
|
43 !nsCRT::strncasecmp((cset), "UTF-7", 5)) |
|
44 |
|
45 NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam) |
|
46 |
|
47 NS_IMETHODIMP |
|
48 nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, |
|
49 const char *aParamName, |
|
50 const nsACString& aFallbackCharset, |
|
51 bool aTryLocaleCharset, |
|
52 char **aLang, nsAString& aResult) |
|
53 { |
|
54 return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING, |
|
55 aFallbackCharset, aTryLocaleCharset, aLang, aResult); |
|
56 } |
|
57 |
|
58 NS_IMETHODIMP |
|
59 nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, |
|
60 const char *aParamName, |
|
61 const nsACString& aFallbackCharset, |
|
62 bool aTryLocaleCharset, |
|
63 char **aLang, nsAString& aResult) |
|
64 { |
|
65 return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING, |
|
66 aFallbackCharset, aTryLocaleCharset, aLang, aResult); |
|
67 } |
|
68 |
|
69 // XXX : aTryLocaleCharset is not yet effective. |
|
70 nsresult |
|
71 nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, |
|
72 const char *aParamName, |
|
73 ParamDecoding aDecoding, |
|
74 const nsACString& aFallbackCharset, |
|
75 bool aTryLocaleCharset, |
|
76 char **aLang, nsAString& aResult) |
|
77 { |
|
78 aResult.Truncate(); |
|
79 nsresult rv; |
|
80 |
|
81 // get parameter (decode RFC 2231/5987 when applicable, as specified by |
|
82 // aDecoding (5987 being a subset of 2231) and return charset.) |
|
83 nsXPIDLCString med; |
|
84 nsXPIDLCString charset; |
|
85 rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, |
|
86 aDecoding, getter_Copies(charset), aLang, |
|
87 getter_Copies(med)); |
|
88 if (NS_FAILED(rv)) |
|
89 return rv; |
|
90 |
|
91 // convert to UTF-8 after charset conversion and RFC 2047 decoding |
|
92 // if necessary. |
|
93 |
|
94 nsAutoCString str1; |
|
95 rv = internalDecodeParameter(med, charset.get(), nullptr, false, |
|
96 // was aDecoding == MIME_FIELD_ENCODING |
|
97 // see bug 875615 |
|
98 true, |
|
99 str1); |
|
100 NS_ENSURE_SUCCESS(rv, rv); |
|
101 |
|
102 if (!aFallbackCharset.IsEmpty()) |
|
103 { |
|
104 nsAutoCString charset; |
|
105 EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset); |
|
106 nsAutoCString str2; |
|
107 nsCOMPtr<nsIUTF8ConverterService> |
|
108 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); |
|
109 if (cvtUTF8 && |
|
110 NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, |
|
111 PromiseFlatCString(aFallbackCharset).get(), false, |
|
112 !charset.EqualsLiteral("UTF-8"), |
|
113 1, str2))) { |
|
114 CopyUTF8toUTF16(str2, aResult); |
|
115 return NS_OK; |
|
116 } |
|
117 } |
|
118 |
|
119 if (IsUTF8(str1)) { |
|
120 CopyUTF8toUTF16(str1, aResult); |
|
121 return NS_OK; |
|
122 } |
|
123 |
|
124 if (aTryLocaleCharset && !NS_IsNativeUTF8()) |
|
125 return NS_CopyNativeToUnicode(str1, aResult); |
|
126 |
|
127 CopyASCIItoUTF16(str1, aResult); |
|
128 return NS_OK; |
|
129 } |
|
130 |
|
131 // remove backslash-encoded sequences from quoted-strings |
|
132 // modifies string in place, potentially shortening it |
|
133 void RemoveQuotedStringEscapes(char *src) |
|
134 { |
|
135 char *dst = src; |
|
136 |
|
137 for (char *c = src; *c; ++c) |
|
138 { |
|
139 if (c[0] == '\\' && c[1]) |
|
140 { |
|
141 // skip backslash if not at end |
|
142 ++c; |
|
143 } |
|
144 *dst++ = *c; |
|
145 } |
|
146 *dst = 0; |
|
147 } |
|
148 |
|
149 // true is character is a hex digit |
|
150 bool IsHexDigit(char aChar) |
|
151 { |
|
152 char c = aChar; |
|
153 |
|
154 return (c >= 'a' && c <= 'f') || |
|
155 (c >= 'A' && c <= 'F') || |
|
156 (c >= '0' && c <= '9'); |
|
157 } |
|
158 |
|
159 // validate that a C String containing %-escapes is syntactically valid |
|
160 bool IsValidPercentEscaped(const char *aValue, int32_t len) |
|
161 { |
|
162 for (int32_t i = 0; i < len; i++) { |
|
163 if (aValue[i] == '%') { |
|
164 if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) { |
|
165 return false; |
|
166 } |
|
167 } |
|
168 } |
|
169 return true; |
|
170 } |
|
171 |
|
172 // Support for continuations (RFC 2231, Section 3) |
|
173 |
|
174 // only a sane number supported |
|
175 #define MAX_CONTINUATIONS 999 |
|
176 |
|
177 // part of a continuation |
|
178 |
|
179 class Continuation { |
|
180 public: |
|
181 Continuation(const char *aValue, uint32_t aLength, |
|
182 bool aNeedsPercentDecoding, bool aWasQuotedString) { |
|
183 value = aValue; |
|
184 length = aLength; |
|
185 needsPercentDecoding = aNeedsPercentDecoding; |
|
186 wasQuotedString = aWasQuotedString; |
|
187 } |
|
188 Continuation() { |
|
189 // empty constructor needed for nsTArray |
|
190 value = 0L; |
|
191 length = 0; |
|
192 needsPercentDecoding = false; |
|
193 wasQuotedString = false; |
|
194 } |
|
195 ~Continuation() {} |
|
196 |
|
197 const char *value; |
|
198 uint32_t length; |
|
199 bool needsPercentDecoding; |
|
200 bool wasQuotedString; |
|
201 }; |
|
202 |
|
203 // combine segments into a single string, returning the allocated string |
|
204 // (or nullptr) while emptying the list |
|
205 char *combineContinuations(nsTArray<Continuation>& aArray) |
|
206 { |
|
207 // Sanity check |
|
208 if (aArray.Length() == 0) |
|
209 return nullptr; |
|
210 |
|
211 // Get an upper bound for the length |
|
212 uint32_t length = 0; |
|
213 for (uint32_t i = 0; i < aArray.Length(); i++) { |
|
214 length += aArray[i].length; |
|
215 } |
|
216 |
|
217 // Allocate |
|
218 char *result = (char *) nsMemory::Alloc(length + 1); |
|
219 |
|
220 // Concatenate |
|
221 if (result) { |
|
222 *result = '\0'; |
|
223 |
|
224 for (uint32_t i = 0; i < aArray.Length(); i++) { |
|
225 Continuation cont = aArray[i]; |
|
226 if (! cont.value) break; |
|
227 |
|
228 char *c = result + strlen(result); |
|
229 strncat(result, cont.value, cont.length); |
|
230 if (cont.needsPercentDecoding) { |
|
231 nsUnescape(c); |
|
232 } |
|
233 if (cont.wasQuotedString) { |
|
234 RemoveQuotedStringEscapes(c); |
|
235 } |
|
236 } |
|
237 |
|
238 // return null if empty value |
|
239 if (*result == '\0') { |
|
240 nsMemory::Free(result); |
|
241 result = nullptr; |
|
242 } |
|
243 } else { |
|
244 // Handle OOM |
|
245 NS_WARNING("Out of memory\n"); |
|
246 } |
|
247 |
|
248 return result; |
|
249 } |
|
250 |
|
251 // add a continuation, return false on error if segment already has been seen |
|
252 bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex, |
|
253 const char *aValue, uint32_t aLength, |
|
254 bool aNeedsPercentDecoding, bool aWasQuotedString) |
|
255 { |
|
256 if (aIndex < aArray.Length() && aArray[aIndex].value) { |
|
257 NS_WARNING("duplicate RC2231 continuation segment #\n"); |
|
258 return false; |
|
259 } |
|
260 |
|
261 if (aIndex > MAX_CONTINUATIONS) { |
|
262 NS_WARNING("RC2231 continuation segment # exceeds limit\n"); |
|
263 return false; |
|
264 } |
|
265 |
|
266 if (aNeedsPercentDecoding && aWasQuotedString) { |
|
267 NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n"); |
|
268 return false; |
|
269 } |
|
270 |
|
271 Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString); |
|
272 |
|
273 if (aArray.Length() <= aIndex) { |
|
274 aArray.SetLength(aIndex + 1); |
|
275 } |
|
276 aArray[aIndex] = cont; |
|
277 |
|
278 return true; |
|
279 } |
|
280 |
|
281 // parse a segment number; return -1 on error |
|
282 int32_t parseSegmentNumber(const char *aValue, int32_t aLen) |
|
283 { |
|
284 if (aLen < 1) { |
|
285 NS_WARNING("segment number missing\n"); |
|
286 return -1; |
|
287 } |
|
288 |
|
289 if (aLen > 1 && aValue[0] == '0') { |
|
290 NS_WARNING("leading '0' not allowed in segment number\n"); |
|
291 return -1; |
|
292 } |
|
293 |
|
294 int32_t segmentNumber = 0; |
|
295 |
|
296 for (int32_t i = 0; i < aLen; i++) { |
|
297 if (! (aValue[i] >= '0' && aValue[i] <= '9')) { |
|
298 NS_WARNING("invalid characters in segment number\n"); |
|
299 return -1; |
|
300 } |
|
301 |
|
302 segmentNumber *= 10; |
|
303 segmentNumber += aValue[i] - '0'; |
|
304 if (segmentNumber > MAX_CONTINUATIONS) { |
|
305 NS_WARNING("Segment number exceeds sane size\n"); |
|
306 return -1; |
|
307 } |
|
308 } |
|
309 |
|
310 return segmentNumber; |
|
311 } |
|
312 |
|
313 // validate a given octet sequence for compliance with the specified |
|
314 // encoding |
|
315 bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets) |
|
316 { |
|
317 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService |
|
318 (NS_UTF8CONVERTERSERVICE_CONTRACTID)); |
|
319 if (!cvtUTF8) { |
|
320 NS_WARNING("Can't get UTF8ConverterService\n"); |
|
321 return false; |
|
322 } |
|
323 |
|
324 nsAutoCString tmpRaw; |
|
325 tmpRaw.Assign(aOctets); |
|
326 nsAutoCString tmpDecoded; |
|
327 |
|
328 nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw, |
|
329 PromiseFlatCString(aCharset).get(), |
|
330 false, false, 1, tmpDecoded); |
|
331 |
|
332 if (rv != NS_OK) { |
|
333 // we can't decode; charset may be unsupported, or the octet sequence |
|
334 // is broken (illegal or incomplete octet sequence contained) |
|
335 NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n"); |
|
336 return false; |
|
337 } |
|
338 |
|
339 return true; |
|
340 } |
|
341 |
|
342 // moved almost verbatim from mimehdrs.cpp |
|
343 // char * |
|
344 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name, |
|
345 // char **charset, char **language) |
|
346 // |
|
347 // The format of these header lines is |
|
348 // <token> [ ';' <token> '=' <token-or-quoted-string> ]* |
|
349 NS_IMETHODIMP |
|
350 nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, |
|
351 const char *aParamName, |
|
352 char **aCharset, |
|
353 char **aLang, |
|
354 char **aResult) |
|
355 { |
|
356 return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING, |
|
357 aCharset, aLang, aResult); |
|
358 } |
|
359 |
|
360 |
|
361 nsresult |
|
362 nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, |
|
363 const char *aParamName, |
|
364 ParamDecoding aDecoding, |
|
365 char **aCharset, |
|
366 char **aLang, |
|
367 char **aResult) |
|
368 { |
|
369 |
|
370 if (!aHeaderValue || !*aHeaderValue || !aResult) |
|
371 return NS_ERROR_INVALID_ARG; |
|
372 |
|
373 *aResult = nullptr; |
|
374 |
|
375 if (aCharset) *aCharset = nullptr; |
|
376 if (aLang) *aLang = nullptr; |
|
377 |
|
378 nsAutoCString charset; |
|
379 |
|
380 // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable |
|
381 // them for HTTP header fields later on, see bug 776324 |
|
382 bool acceptContinuations = true; |
|
383 |
|
384 const char *str = aHeaderValue; |
|
385 |
|
386 // skip leading white space. |
|
387 for (; *str && nsCRT::IsAsciiSpace(*str); ++str) |
|
388 ; |
|
389 const char *start = str; |
|
390 |
|
391 // aParamName is empty. return the first (possibly) _unnamed_ 'parameter' |
|
392 // For instance, return 'inline' in the following case: |
|
393 // Content-Disposition: inline; filename=..... |
|
394 if (!aParamName || !*aParamName) |
|
395 { |
|
396 for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str) |
|
397 ; |
|
398 if (str == start) |
|
399 return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY; |
|
400 |
|
401 *aResult = (char *) nsMemory::Clone(start, (str - start) + 1); |
|
402 NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY); |
|
403 (*aResult)[str - start] = '\0'; // null-terminate |
|
404 return NS_OK; |
|
405 } |
|
406 |
|
407 /* Skip forward to first ';' */ |
|
408 for (; *str && *str != ';' && *str != ','; ++str) |
|
409 ; |
|
410 if (*str) |
|
411 str++; |
|
412 /* Skip over following whitespace */ |
|
413 for (; *str && nsCRT::IsAsciiSpace(*str); ++str) |
|
414 ; |
|
415 |
|
416 // Some broken http servers just specify parameters |
|
417 // like 'filename' without specifying disposition |
|
418 // method. Rewind to the first non-white-space |
|
419 // character. |
|
420 |
|
421 if (!*str) |
|
422 str = start; |
|
423 |
|
424 // RFC2231 - The legitimate parm format can be: |
|
425 // A. title=ThisIsTitle |
|
426 // B. title*=us-ascii'en-us'This%20is%20wierd. |
|
427 // C. title*0*=us-ascii'en'This%20is%20wierd.%20We |
|
428 // title*1*=have%20to%20support%20this. |
|
429 // title*2="Else..." |
|
430 // D. title*0="Hey, what you think you are doing?" |
|
431 // title*1="There is no charset and lang info." |
|
432 // RFC5987: only A and B |
|
433 |
|
434 // collect results for the different algorithms (plain filename, |
|
435 // RFC5987/2231-encoded filename, + continuations) separately and decide |
|
436 // which to use at the end |
|
437 char *caseAResult = nullptr; |
|
438 char *caseBResult = nullptr; |
|
439 char *caseCDResult = nullptr; |
|
440 |
|
441 // collect continuation segments |
|
442 nsTArray<Continuation> segments; |
|
443 |
|
444 |
|
445 // our copies of the charset parameter, kept separately as they might |
|
446 // differ for the two formats |
|
447 nsDependentCSubstring charsetB, charsetCD; |
|
448 |
|
449 nsDependentCSubstring lang; |
|
450 |
|
451 int32_t paramLen = strlen(aParamName); |
|
452 |
|
453 while (*str) { |
|
454 // find name/value |
|
455 |
|
456 const char *nameStart = str; |
|
457 const char *nameEnd = nullptr; |
|
458 const char *valueStart = str; |
|
459 const char *valueEnd = nullptr; |
|
460 bool isQuotedString = false; |
|
461 |
|
462 NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace."); |
|
463 |
|
464 // Skip forward to the end of this token. |
|
465 for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++) |
|
466 ; |
|
467 nameEnd = str; |
|
468 |
|
469 int32_t nameLen = nameEnd - nameStart; |
|
470 |
|
471 // Skip over whitespace, '=', and whitespace |
|
472 while (nsCRT::IsAsciiSpace(*str)) ++str; |
|
473 if (!*str) { |
|
474 break; |
|
475 } |
|
476 if (*str++ != '=') { |
|
477 // don't accept parameters without "=" |
|
478 goto increment_str; |
|
479 } |
|
480 while (nsCRT::IsAsciiSpace(*str)) ++str; |
|
481 |
|
482 if (*str != '"') { |
|
483 // The value is a token, not a quoted string. |
|
484 valueStart = str; |
|
485 for (valueEnd = str; |
|
486 *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';'; |
|
487 valueEnd++) |
|
488 ; |
|
489 str = valueEnd; |
|
490 } else { |
|
491 isQuotedString = true; |
|
492 |
|
493 ++str; |
|
494 valueStart = str; |
|
495 for (valueEnd = str; *valueEnd; ++valueEnd) { |
|
496 if (*valueEnd == '\\' && *(valueEnd + 1)) |
|
497 ++valueEnd; |
|
498 else if (*valueEnd == '"') |
|
499 break; |
|
500 } |
|
501 str = valueEnd; |
|
502 // *valueEnd != null means that *valueEnd is quote character. |
|
503 if (*valueEnd) |
|
504 str++; |
|
505 } |
|
506 |
|
507 // See if this is the simplest case (case A above), |
|
508 // a 'single' line value with no charset and lang. |
|
509 // If so, copy it and return. |
|
510 if (nameLen == paramLen && |
|
511 !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) { |
|
512 |
|
513 if (caseAResult) { |
|
514 // we already have one caseA result, ignore subsequent ones |
|
515 goto increment_str; |
|
516 } |
|
517 |
|
518 // if the parameter spans across multiple lines we have to strip out the |
|
519 // line continuation -- jht 4/29/98 |
|
520 nsAutoCString tempStr(valueStart, valueEnd - valueStart); |
|
521 tempStr.StripChars("\r\n"); |
|
522 char *res = ToNewCString(tempStr); |
|
523 NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY); |
|
524 |
|
525 if (isQuotedString) |
|
526 RemoveQuotedStringEscapes(res); |
|
527 |
|
528 caseAResult = res; |
|
529 // keep going, we may find a RFC 2231/5987 encoded alternative |
|
530 } |
|
531 // case B, C, and D |
|
532 else if (nameLen > paramLen && |
|
533 !nsCRT::strncasecmp(nameStart, aParamName, paramLen) && |
|
534 *(nameStart + paramLen) == '*') { |
|
535 |
|
536 // 1st char past '*' |
|
537 const char *cp = nameStart + paramLen + 1; |
|
538 |
|
539 // if param name ends in "*" we need do to RFC5987 "ext-value" decoding |
|
540 bool needExtDecoding = *(nameEnd - 1) == '*'; |
|
541 |
|
542 bool caseB = nameLen == paramLen + 1; |
|
543 bool caseCStart = (*cp == '0') && needExtDecoding; |
|
544 |
|
545 // parse the segment number |
|
546 int32_t segmentNumber = -1; |
|
547 if (!caseB) { |
|
548 int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0); |
|
549 segmentNumber = parseSegmentNumber(cp, segLen); |
|
550 |
|
551 if (segmentNumber == -1) { |
|
552 acceptContinuations = false; |
|
553 goto increment_str; |
|
554 } |
|
555 } |
|
556 |
|
557 // CaseB and start of CaseC: requires charset and optional language |
|
558 // in quotes (quotes required even if lang is blank) |
|
559 if (caseB || (caseCStart && acceptContinuations)) { |
|
560 // look for single quotation mark(') |
|
561 const char *sQuote1 = PL_strchr(valueStart, 0x27); |
|
562 const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr; |
|
563 |
|
564 // Two single quotation marks must be present even in |
|
565 // absence of charset and lang. |
|
566 if (!sQuote1 || !sQuote2) { |
|
567 NS_WARNING("Mandatory two single quotes are missing in header parameter\n"); |
|
568 } |
|
569 |
|
570 const char *charsetStart = nullptr; |
|
571 int32_t charsetLength = 0; |
|
572 const char *langStart = nullptr; |
|
573 int32_t langLength = 0; |
|
574 const char *rawValStart = nullptr; |
|
575 int32_t rawValLength = 0; |
|
576 |
|
577 if (sQuote2 && sQuote1) { |
|
578 // both delimiters present: charSet'lang'rawVal |
|
579 rawValStart = sQuote2 + 1; |
|
580 rawValLength = valueEnd - rawValStart; |
|
581 |
|
582 langStart = sQuote1 + 1; |
|
583 langLength = sQuote2 - langStart; |
|
584 |
|
585 charsetStart = valueStart; |
|
586 charsetLength = sQuote1 - charsetStart; |
|
587 } |
|
588 else if (sQuote1) { |
|
589 // one delimiter; assume charset'rawVal |
|
590 rawValStart = sQuote1 + 1; |
|
591 rawValLength = valueEnd - rawValStart; |
|
592 |
|
593 charsetStart = valueStart; |
|
594 charsetLength = sQuote1 - valueStart; |
|
595 } |
|
596 else { |
|
597 // no delimiter: just rawVal |
|
598 rawValStart = valueStart; |
|
599 rawValLength = valueEnd - valueStart; |
|
600 } |
|
601 |
|
602 if (langLength != 0) { |
|
603 lang.Assign(langStart, langLength); |
|
604 } |
|
605 |
|
606 // keep the charset for later |
|
607 if (caseB) { |
|
608 charsetB.Assign(charsetStart, charsetLength); |
|
609 } else { |
|
610 // if caseCorD |
|
611 charsetCD.Assign(charsetStart, charsetLength); |
|
612 } |
|
613 |
|
614 // non-empty value part |
|
615 if (rawValLength > 0) { |
|
616 if (!caseBResult && caseB) { |
|
617 if (!IsValidPercentEscaped(rawValStart, rawValLength)) { |
|
618 goto increment_str; |
|
619 } |
|
620 |
|
621 // allocate buffer for the raw value |
|
622 char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1); |
|
623 if (!tmpResult) { |
|
624 goto increment_str; |
|
625 } |
|
626 *(tmpResult + rawValLength) = 0; |
|
627 |
|
628 nsUnescape(tmpResult); |
|
629 caseBResult = tmpResult; |
|
630 } else { |
|
631 // caseC |
|
632 bool added = addContinuation(segments, 0, rawValStart, |
|
633 rawValLength, needExtDecoding, |
|
634 isQuotedString); |
|
635 |
|
636 if (!added) { |
|
637 // continuation not added, stop processing them |
|
638 acceptContinuations = false; |
|
639 } |
|
640 } |
|
641 } |
|
642 } // end of if-block : title*0*= or title*= |
|
643 // caseD: a line of multiline param with no need for unescaping : title*[0-9]= |
|
644 // or 2nd or later lines of a caseC param : title*[1-9]*= |
|
645 else if (acceptContinuations && segmentNumber != -1) { |
|
646 uint32_t valueLength = valueEnd - valueStart; |
|
647 |
|
648 bool added = addContinuation(segments, segmentNumber, valueStart, |
|
649 valueLength, needExtDecoding, |
|
650 isQuotedString); |
|
651 |
|
652 if (!added) { |
|
653 // continuation not added, stop processing them |
|
654 acceptContinuations = false; |
|
655 } |
|
656 } // end of if-block : title*[0-9]= or title*[1-9]*= |
|
657 } |
|
658 |
|
659 // str now points after the end of the value. |
|
660 // skip over whitespace, ';', whitespace. |
|
661 increment_str: |
|
662 while (nsCRT::IsAsciiSpace(*str)) ++str; |
|
663 if (*str == ';') { |
|
664 ++str; |
|
665 } else { |
|
666 // stop processing the header field; either we are done or the |
|
667 // separator was missing |
|
668 break; |
|
669 } |
|
670 while (nsCRT::IsAsciiSpace(*str)) ++str; |
|
671 } |
|
672 |
|
673 caseCDResult = combineContinuations(segments); |
|
674 |
|
675 if (caseBResult && !charsetB.IsEmpty()) { |
|
676 // check that the 2231/5987 result decodes properly given the |
|
677 // specified character set |
|
678 if (!IsValidOctetSequenceForCharset(charsetB, caseBResult)) |
|
679 caseBResult = nullptr; |
|
680 } |
|
681 |
|
682 if (caseCDResult && !charsetCD.IsEmpty()) { |
|
683 // check that the 2231/5987 result decodes properly given the |
|
684 // specified character set |
|
685 if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult)) |
|
686 caseCDResult = nullptr; |
|
687 } |
|
688 |
|
689 if (caseBResult) { |
|
690 // prefer simple 5987 format over 2231 with continuations |
|
691 *aResult = caseBResult; |
|
692 caseBResult = nullptr; |
|
693 charset.Assign(charsetB); |
|
694 } |
|
695 else if (caseCDResult) { |
|
696 // prefer 2231/5987 with or without continuations over plain format |
|
697 *aResult = caseCDResult; |
|
698 caseCDResult = nullptr; |
|
699 charset.Assign(charsetCD); |
|
700 } |
|
701 else if (caseAResult) { |
|
702 *aResult = caseAResult; |
|
703 caseAResult = nullptr; |
|
704 } |
|
705 |
|
706 // free unused stuff |
|
707 nsMemory::Free(caseAResult); |
|
708 nsMemory::Free(caseBResult); |
|
709 nsMemory::Free(caseCDResult); |
|
710 |
|
711 // if we have a result |
|
712 if (*aResult) { |
|
713 // then return charset and lang as well |
|
714 if (aLang && !lang.IsEmpty()) { |
|
715 uint32_t len = lang.Length(); |
|
716 *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1); |
|
717 if (*aLang) { |
|
718 *(*aLang + len) = 0; |
|
719 } |
|
720 } |
|
721 if (aCharset && !charset.IsEmpty()) { |
|
722 uint32_t len = charset.Length(); |
|
723 *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1); |
|
724 if (*aCharset) { |
|
725 *(*aCharset + len) = 0; |
|
726 } |
|
727 } |
|
728 } |
|
729 |
|
730 return *aResult ? NS_OK : NS_ERROR_INVALID_ARG; |
|
731 } |
|
732 |
|
733 nsresult |
|
734 internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset, |
|
735 bool aOverrideCharset, bool aEatContinuations, |
|
736 nsACString& aResult) |
|
737 { |
|
738 aResult.Truncate(); |
|
739 if (!aHeaderVal) |
|
740 return NS_ERROR_INVALID_ARG; |
|
741 if (!*aHeaderVal) |
|
742 return NS_OK; |
|
743 |
|
744 |
|
745 // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but |
|
746 // aDefaultCharset is specified, decodes RFC 2047 encoding and converts |
|
747 // to UTF-8. Otherwise, just strips away CRLF. |
|
748 if (PL_strstr(aHeaderVal, "=?") || |
|
749 (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || |
|
750 Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) { |
|
751 DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult); |
|
752 } else if (aEatContinuations && |
|
753 (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) { |
|
754 aResult = aHeaderVal; |
|
755 } else { |
|
756 aEatContinuations = false; |
|
757 aResult = aHeaderVal; |
|
758 } |
|
759 |
|
760 if (aEatContinuations) { |
|
761 nsAutoCString temp(aResult); |
|
762 temp.ReplaceSubstring("\n\t", " "); |
|
763 temp.ReplaceSubstring("\r\t", " "); |
|
764 temp.StripChars("\r\n"); |
|
765 aResult = temp; |
|
766 } |
|
767 |
|
768 return NS_OK; |
|
769 } |
|
770 |
|
771 NS_IMETHODIMP |
|
772 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, |
|
773 const char* aDefaultCharset, |
|
774 bool aOverrideCharset, |
|
775 bool aEatContinuations, |
|
776 nsACString& aResult) |
|
777 { |
|
778 return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset, |
|
779 aOverrideCharset, aEatContinuations, |
|
780 aResult); |
|
781 } |
|
782 |
|
783 // true if the character is allowed in a RFC 5987 value |
|
784 // see RFC 5987, Section 3.2.1, "attr-char" |
|
785 bool IsRFC5987AttrChar(char aChar) |
|
786 { |
|
787 char c = aChar; |
|
788 |
|
789 return (c >= 'a' && c <= 'z') || |
|
790 (c >= 'A' && c <= 'Z') || |
|
791 (c >= '0' && c <= '9') || |
|
792 (c == '!' || c == '#' || c == '$' || c == '&' || |
|
793 c == '+' || c == '-' || c == '.' || c == '^' || |
|
794 c == '_' || c == '`' || c == '|' || c == '~'); |
|
795 } |
|
796 |
|
797 // percent-decode a value |
|
798 // returns false on failure |
|
799 bool PercentDecode(nsACString& aValue) |
|
800 { |
|
801 char *c = (char *) nsMemory::Alloc(aValue.Length() + 1); |
|
802 if (!c) { |
|
803 return false; |
|
804 } |
|
805 |
|
806 strcpy(c, PromiseFlatCString(aValue).get()); |
|
807 nsUnescape(c); |
|
808 aValue.Assign(c); |
|
809 nsMemory::Free(c); |
|
810 |
|
811 return true; |
|
812 } |
|
813 |
|
814 // Decode a parameter value using the encoding defined in RFC 5987 |
|
815 // |
|
816 // charset "'" [ language ] "'" value-chars |
|
817 NS_IMETHODIMP |
|
818 nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal, |
|
819 nsACString& aLang, |
|
820 nsAString& aResult) |
|
821 { |
|
822 nsAutoCString charset; |
|
823 nsAutoCString language; |
|
824 nsAutoCString value; |
|
825 |
|
826 uint32_t delimiters = 0; |
|
827 const char *encoded = PromiseFlatCString(aParamVal).get(); |
|
828 const char *c = encoded; |
|
829 |
|
830 while (*c) { |
|
831 char tc = *c++; |
|
832 |
|
833 if (tc == '\'') { |
|
834 // single quote |
|
835 delimiters++; |
|
836 } else if (((unsigned char)tc) >= 128) { |
|
837 // fail early, not ASCII |
|
838 NS_WARNING("non-US-ASCII character in RFC5987-encoded param"); |
|
839 return NS_ERROR_INVALID_ARG; |
|
840 } else { |
|
841 if (delimiters == 0) { |
|
842 // valid characters are checked later implicitly |
|
843 charset.Append(tc); |
|
844 } else if (delimiters == 1) { |
|
845 // no value checking for now |
|
846 language.Append(tc); |
|
847 } else if (delimiters == 2) { |
|
848 if (IsRFC5987AttrChar(tc)) { |
|
849 value.Append(tc); |
|
850 } else if (tc == '%') { |
|
851 if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) { |
|
852 // we expect two more characters |
|
853 NS_WARNING("broken %-escape in RFC5987-encoded param"); |
|
854 return NS_ERROR_INVALID_ARG; |
|
855 } |
|
856 value.Append(tc); |
|
857 // we consume two more |
|
858 value.Append(*c++); |
|
859 value.Append(*c++); |
|
860 } else { |
|
861 // character not allowed here |
|
862 NS_WARNING("invalid character in RFC5987-encoded param"); |
|
863 return NS_ERROR_INVALID_ARG; |
|
864 } |
|
865 } |
|
866 } |
|
867 } |
|
868 |
|
869 if (delimiters != 2) { |
|
870 NS_WARNING("missing delimiters in RFC5987-encoded param"); |
|
871 return NS_ERROR_INVALID_ARG; |
|
872 } |
|
873 |
|
874 // abort early for unsupported encodings |
|
875 if (!charset.LowerCaseEqualsLiteral("utf-8")) { |
|
876 NS_WARNING("unsupported charset in RFC5987-encoded param"); |
|
877 return NS_ERROR_INVALID_ARG; |
|
878 } |
|
879 |
|
880 // percent-decode |
|
881 if (!PercentDecode(value)) { |
|
882 return NS_ERROR_OUT_OF_MEMORY; |
|
883 } |
|
884 |
|
885 // return the encoding |
|
886 aLang.Assign(language); |
|
887 |
|
888 // finally convert octet sequence to UTF-8 and be done |
|
889 nsresult rv = NS_OK; |
|
890 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 = |
|
891 do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv); |
|
892 NS_ENSURE_SUCCESS(rv, rv); |
|
893 |
|
894 nsAutoCString utf8; |
|
895 rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8); |
|
896 NS_ENSURE_SUCCESS(rv, rv); |
|
897 |
|
898 CopyUTF8toUTF16(utf8, aResult); |
|
899 return NS_OK; |
|
900 } |
|
901 |
|
902 nsresult |
|
903 internalDecodeParameter(const nsACString& aParamValue, const char* aCharset, |
|
904 const char* aDefaultCharset, bool aOverrideCharset, |
|
905 bool aDecode2047, nsACString& aResult) |
|
906 { |
|
907 aResult.Truncate(); |
|
908 // If aCharset is given, aParamValue was obtained from RFC2231/5987 |
|
909 // encoding and we're pretty sure that it's in aCharset. |
|
910 if (aCharset && *aCharset) |
|
911 { |
|
912 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); |
|
913 if (cvtUTF8) |
|
914 return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset, |
|
915 true, true, 1, aResult); |
|
916 } |
|
917 |
|
918 const nsAFlatCString& param = PromiseFlatCString(aParamValue); |
|
919 nsAutoCString unQuoted; |
|
920 nsACString::const_iterator s, e; |
|
921 param.BeginReading(s); |
|
922 param.EndReading(e); |
|
923 |
|
924 // strip '\' when used to quote CR, LF, '"' and '\' |
|
925 for ( ; s != e; ++s) { |
|
926 if ((*s == '\\')) { |
|
927 if (++s == e) { |
|
928 --s; // '\' is at the end. move back and append '\'. |
|
929 } |
|
930 else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') { |
|
931 --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\' |
|
932 } |
|
933 // else : skip '\' and append the quoted character. |
|
934 } |
|
935 unQuoted.Append(*s); |
|
936 } |
|
937 |
|
938 aResult = unQuoted; |
|
939 nsresult rv = NS_OK; |
|
940 |
|
941 if (aDecode2047) { |
|
942 nsAutoCString decoded; |
|
943 |
|
944 // Try RFC 2047 encoding, instead. |
|
945 rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset, |
|
946 aOverrideCharset, true, decoded); |
|
947 |
|
948 if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) |
|
949 aResult = decoded; |
|
950 } |
|
951 |
|
952 return rv; |
|
953 } |
|
954 |
|
955 NS_IMETHODIMP |
|
956 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, |
|
957 const char* aCharset, |
|
958 const char* aDefaultCharset, |
|
959 bool aOverrideCharset, |
|
960 nsACString& aResult) |
|
961 { |
|
962 return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset, |
|
963 aOverrideCharset, true, aResult); |
|
964 } |
|
965 |
|
966 #define ISHEXCHAR(c) \ |
|
967 ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \ |
|
968 (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \ |
|
969 (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66)) |
|
970 |
|
971 // Decode Q encoding (RFC 2047). |
|
972 // static |
|
973 char *DecodeQ(const char *in, uint32_t length) |
|
974 { |
|
975 char *out, *dest = 0; |
|
976 |
|
977 out = dest = (char *)PR_Calloc(length + 1, sizeof(char)); |
|
978 if (dest == nullptr) |
|
979 return nullptr; |
|
980 while (length > 0) { |
|
981 unsigned c = 0; |
|
982 switch (*in) { |
|
983 case '=': |
|
984 // check if |in| in the form of '=hh' where h is [0-9a-fA-F]. |
|
985 if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2])) |
|
986 goto badsyntax; |
|
987 PR_sscanf(in + 1, "%2X", &c); |
|
988 *out++ = (char) c; |
|
989 in += 3; |
|
990 length -= 3; |
|
991 break; |
|
992 |
|
993 case '_': |
|
994 *out++ = ' '; |
|
995 in++; |
|
996 length--; |
|
997 break; |
|
998 |
|
999 default: |
|
1000 if (*in & 0x80) goto badsyntax; |
|
1001 *out++ = *in++; |
|
1002 length--; |
|
1003 } |
|
1004 } |
|
1005 *out++ = '\0'; |
|
1006 |
|
1007 for (out = dest; *out ; ++out) { |
|
1008 if (*out == '\t') |
|
1009 *out = ' '; |
|
1010 } |
|
1011 |
|
1012 return dest; |
|
1013 |
|
1014 badsyntax: |
|
1015 PR_Free(dest); |
|
1016 return nullptr; |
|
1017 } |
|
1018 |
|
1019 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) |
|
1020 // or has ESC which may be an indication that it's in one of many ISO |
|
1021 // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554). |
|
1022 // static |
|
1023 bool Is7bitNonAsciiString(const char *input, uint32_t len) |
|
1024 { |
|
1025 int32_t c; |
|
1026 |
|
1027 enum { hz_initial, // No HZ seen yet |
|
1028 hz_escaped, // Inside an HZ ~{ escape sequence |
|
1029 hz_seen, // Have seen at least one complete HZ sequence |
|
1030 hz_notpresent // Have seen something that is not legal HZ |
|
1031 } hz_state; |
|
1032 |
|
1033 hz_state = hz_initial; |
|
1034 while (len) { |
|
1035 c = uint8_t(*input++); |
|
1036 len--; |
|
1037 if (c & 0x80) return false; |
|
1038 if (c == 0x1B) return true; |
|
1039 if (c == '~') { |
|
1040 switch (hz_state) { |
|
1041 case hz_initial: |
|
1042 case hz_seen: |
|
1043 if (*input == '{') { |
|
1044 hz_state = hz_escaped; |
|
1045 } else if (*input == '~') { |
|
1046 // ~~ is the HZ encoding of ~. Skip over second ~ as well |
|
1047 hz_state = hz_seen; |
|
1048 input++; |
|
1049 len--; |
|
1050 } else { |
|
1051 hz_state = hz_notpresent; |
|
1052 } |
|
1053 break; |
|
1054 |
|
1055 case hz_escaped: |
|
1056 if (*input == '}') hz_state = hz_seen; |
|
1057 break; |
|
1058 default: |
|
1059 break; |
|
1060 } |
|
1061 } |
|
1062 } |
|
1063 return hz_state == hz_seen; |
|
1064 } |
|
1065 |
|
1066 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD) |
|
1067 |
|
1068 // copy 'raw' sequences of octets in aInput to aOutput. |
|
1069 // If aDefaultCharset is specified, the input is assumed to be in the |
|
1070 // charset and converted to UTF-8. Otherwise, a blind copy is made. |
|
1071 // If aDefaultCharset is specified, but the conversion to UTF-8 |
|
1072 // is not successful, each octet is replaced by Unicode replacement |
|
1073 // chars. *aOutput is advanced by the number of output octets. |
|
1074 // static |
|
1075 void CopyRawHeader(const char *aInput, uint32_t aLen, |
|
1076 const char *aDefaultCharset, nsACString &aOutput) |
|
1077 { |
|
1078 int32_t c; |
|
1079 |
|
1080 // If aDefaultCharset is not specified, make a blind copy. |
|
1081 if (!aDefaultCharset || !*aDefaultCharset) { |
|
1082 aOutput.Append(aInput, aLen); |
|
1083 return; |
|
1084 } |
|
1085 |
|
1086 // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022 |
|
1087 // A ~ may indicate it is HZ |
|
1088 while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) { |
|
1089 aOutput.Append(char(c)); |
|
1090 aLen--; |
|
1091 } |
|
1092 if (!aLen) { |
|
1093 return; |
|
1094 } |
|
1095 aInput--; |
|
1096 |
|
1097 // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii |
|
1098 // string and aDefaultCharset is a 7bit non-ascii charset. |
|
1099 bool skipCheck = (c == 0x1B || c == '~') && |
|
1100 IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset); |
|
1101 |
|
1102 // If not UTF-8, treat as default charset |
|
1103 nsCOMPtr<nsIUTF8ConverterService> |
|
1104 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); |
|
1105 nsAutoCString utf8Text; |
|
1106 if (cvtUTF8 && |
|
1107 NS_SUCCEEDED( |
|
1108 cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), |
|
1109 aDefaultCharset, skipCheck, true, 1, |
|
1110 utf8Text))) { |
|
1111 aOutput.Append(utf8Text); |
|
1112 } else { // replace each octet with Unicode replacement char in UTF-8. |
|
1113 for (uint32_t i = 0; i < aLen; i++) { |
|
1114 c = uint8_t(*aInput++); |
|
1115 if (c & 0x80) |
|
1116 aOutput.Append(REPLACEMENT_CHAR); |
|
1117 else |
|
1118 aOutput.Append(char(c)); |
|
1119 } |
|
1120 } |
|
1121 } |
|
1122 |
|
1123 nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64, |
|
1124 const char *aCharset, nsACString &aResult) |
|
1125 { |
|
1126 char *decodedText; |
|
1127 NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'"); |
|
1128 if(aQOrBase64 == 'Q') |
|
1129 decodedText = DecodeQ(aEncoded, aLen); |
|
1130 else if (aQOrBase64 == 'B') { |
|
1131 decodedText = PL_Base64Decode(aEncoded, aLen, nullptr); |
|
1132 } else { |
|
1133 return NS_ERROR_INVALID_ARG; |
|
1134 } |
|
1135 |
|
1136 if (!decodedText) { |
|
1137 return NS_ERROR_INVALID_ARG; |
|
1138 } |
|
1139 |
|
1140 nsresult rv; |
|
1141 nsCOMPtr<nsIUTF8ConverterService> |
|
1142 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv)); |
|
1143 nsAutoCString utf8Text; |
|
1144 if (NS_SUCCEEDED(rv)) { |
|
1145 // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset. |
|
1146 rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText), |
|
1147 aCharset, |
|
1148 IS_7BIT_NON_ASCII_CHARSET(aCharset), |
|
1149 true, 1, utf8Text); |
|
1150 } |
|
1151 PR_Free(decodedText); |
|
1152 if (NS_FAILED(rv)) { |
|
1153 return rv; |
|
1154 } |
|
1155 aResult.Append(utf8Text); |
|
1156 |
|
1157 return NS_OK; |
|
1158 } |
|
1159 |
|
1160 static const char especials[] = "()<>@,;:\\\"/[]?.="; |
|
1161 |
|
1162 // |decode_mime_part2_str| taken from comi18n.c |
|
1163 // Decode RFC2047-encoded words in the input and convert the result to UTF-8. |
|
1164 // If aOverrideCharset is true, charset in RFC2047-encoded words is |
|
1165 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset |
|
1166 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8. |
|
1167 //static |
|
1168 nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, |
|
1169 bool aOverrideCharset, nsACString &aResult) |
|
1170 { |
|
1171 const char *p, *q = nullptr, *r; |
|
1172 const char *begin; // tracking pointer for where we are in the input buffer |
|
1173 int32_t isLastEncodedWord = 0; |
|
1174 const char *charsetStart, *charsetEnd; |
|
1175 nsAutoCString prevCharset, curCharset; |
|
1176 nsAutoCString encodedText; |
|
1177 char prevEncoding = '\0', curEncoding; |
|
1178 nsresult rv; |
|
1179 |
|
1180 begin = aHeader; |
|
1181 |
|
1182 // To avoid buffer realloc, if possible, set capacity in advance. No |
|
1183 // matter what, more than 3x expansion can never happen for all charsets |
|
1184 // supported by Mozilla. SCSU/BCSU with the sliding window set to a |
|
1185 // non-BMP block may be exceptions, but Mozilla does not support them. |
|
1186 // Neither any known mail/news program use them. Even if there's, we're |
|
1187 // safe because we don't use a raw *char any more. |
|
1188 aResult.SetCapacity(3 * strlen(aHeader)); |
|
1189 |
|
1190 while ((p = PL_strstr(begin, "=?")) != 0) { |
|
1191 if (isLastEncodedWord) { |
|
1192 // See if it's all whitespace. |
|
1193 for (q = begin; q < p; ++q) { |
|
1194 if (!PL_strchr(" \t\r\n", *q)) break; |
|
1195 } |
|
1196 } |
|
1197 |
|
1198 if (!isLastEncodedWord || q < p) { |
|
1199 if (!encodedText.IsEmpty()) { |
|
1200 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
|
1201 prevEncoding, prevCharset.get(), aResult); |
|
1202 if (NS_FAILED(rv)) { |
|
1203 aResult.Append(encodedText); |
|
1204 } |
|
1205 encodedText.Truncate(); |
|
1206 prevCharset.Truncate(); |
|
1207 prevEncoding = '\0'; |
|
1208 } |
|
1209 // copy the part before the encoded-word |
|
1210 CopyRawHeader(begin, p - begin, aDefaultCharset, aResult); |
|
1211 begin = p; |
|
1212 } |
|
1213 |
|
1214 p += 2; |
|
1215 |
|
1216 // Get charset info |
|
1217 charsetStart = p; |
|
1218 charsetEnd = 0; |
|
1219 for (q = p; *q != '?'; q++) { |
|
1220 if (*q <= ' ' || PL_strchr(especials, *q)) { |
|
1221 goto badsyntax; |
|
1222 } |
|
1223 |
|
1224 // RFC 2231 section 5 |
|
1225 if (!charsetEnd && *q == '*') { |
|
1226 charsetEnd = q; |
|
1227 } |
|
1228 } |
|
1229 if (!charsetEnd) { |
|
1230 charsetEnd = q; |
|
1231 } |
|
1232 |
|
1233 q++; |
|
1234 curEncoding = nsCRT::ToUpper(*q); |
|
1235 if (curEncoding != 'Q' && curEncoding != 'B') |
|
1236 goto badsyntax; |
|
1237 |
|
1238 if (q[1] != '?') |
|
1239 goto badsyntax; |
|
1240 |
|
1241 r = q; |
|
1242 for (r = q + 2; *r != '?'; r++) { |
|
1243 if (*r < ' ') goto badsyntax; |
|
1244 } |
|
1245 if (r[1] != '=') |
|
1246 goto badsyntax; |
|
1247 else if (r == q + 2) { |
|
1248 // it's empty, skip |
|
1249 begin = r + 2; |
|
1250 isLastEncodedWord = 1; |
|
1251 continue; |
|
1252 } |
|
1253 |
|
1254 curCharset.Assign(charsetStart, charsetEnd - charsetStart); |
|
1255 // Override charset if requested. Never override labeled UTF-8. |
|
1256 // Use default charset instead of UNKNOWN-8BIT |
|
1257 if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) |
|
1258 || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT")) |
|
1259 ) { |
|
1260 curCharset = aDefaultCharset; |
|
1261 } |
|
1262 |
|
1263 const char *R; |
|
1264 R = r; |
|
1265 if (curEncoding == 'B') { |
|
1266 // bug 227290. ignore an extraneous '=' at the end. |
|
1267 // (# of characters in B-encoded part has to be a multiple of 4) |
|
1268 int32_t n = r - (q + 2); |
|
1269 R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0; |
|
1270 } |
|
1271 // Bug 493544. Don't decode the encoded text until it ends |
|
1272 if (R[-1] != '=' |
|
1273 && (prevCharset.IsEmpty() |
|
1274 || (curCharset == prevCharset && curEncoding == prevEncoding)) |
|
1275 ) { |
|
1276 encodedText.Append(q + 2, R - (q + 2)); |
|
1277 prevCharset = curCharset; |
|
1278 prevEncoding = curEncoding; |
|
1279 |
|
1280 begin = r + 2; |
|
1281 isLastEncodedWord = 1; |
|
1282 continue; |
|
1283 } |
|
1284 |
|
1285 bool bDecoded; // If the current line has been decoded. |
|
1286 bDecoded = false; |
|
1287 if (!encodedText.IsEmpty()) { |
|
1288 if (curCharset == prevCharset && curEncoding == prevEncoding) { |
|
1289 encodedText.Append(q + 2, R - (q + 2)); |
|
1290 bDecoded = true; |
|
1291 } |
|
1292 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
|
1293 prevEncoding, prevCharset.get(), aResult); |
|
1294 if (NS_FAILED(rv)) { |
|
1295 aResult.Append(encodedText); |
|
1296 } |
|
1297 encodedText.Truncate(); |
|
1298 prevCharset.Truncate(); |
|
1299 prevEncoding = '\0'; |
|
1300 } |
|
1301 if (!bDecoded) { |
|
1302 rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, |
|
1303 curCharset.get(), aResult); |
|
1304 if (NS_FAILED(rv)) { |
|
1305 aResult.Append(encodedText); |
|
1306 } |
|
1307 } |
|
1308 |
|
1309 begin = r + 2; |
|
1310 isLastEncodedWord = 1; |
|
1311 continue; |
|
1312 |
|
1313 badsyntax: |
|
1314 if (!encodedText.IsEmpty()) { |
|
1315 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
|
1316 prevEncoding, prevCharset.get(), aResult); |
|
1317 if (NS_FAILED(rv)) { |
|
1318 aResult.Append(encodedText); |
|
1319 } |
|
1320 encodedText.Truncate(); |
|
1321 prevCharset.Truncate(); |
|
1322 } |
|
1323 // copy the part before the encoded-word |
|
1324 aResult.Append(begin, p - begin); |
|
1325 begin = p; |
|
1326 isLastEncodedWord = 0; |
|
1327 } |
|
1328 |
|
1329 if (!encodedText.IsEmpty()) { |
|
1330 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), |
|
1331 prevEncoding, prevCharset.get(), aResult); |
|
1332 if (NS_FAILED(rv)) { |
|
1333 aResult.Append(encodedText); |
|
1334 } |
|
1335 } |
|
1336 |
|
1337 // put the tail back |
|
1338 CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult); |
|
1339 |
|
1340 nsAutoCString tempStr(aResult); |
|
1341 tempStr.ReplaceChar('\t', ' '); |
|
1342 aResult = tempStr; |
|
1343 |
|
1344 return NS_OK; |
|
1345 } |