1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/netwerk/mime/nsMIMEHeaderParamImpl.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1345 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set sw=4 ts=8 et tw=80 : */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include <string.h> 1.11 +#include "prmem.h" 1.12 +#include "prprf.h" 1.13 +#include "plstr.h" 1.14 +#include "plbase64.h" 1.15 +#include "nsCRT.h" 1.16 +#include "nsMemory.h" 1.17 +#include "nsTArray.h" 1.18 +#include "nsCOMPtr.h" 1.19 +#include "nsEscape.h" 1.20 +#include "nsIUTF8ConverterService.h" 1.21 +#include "nsUConvCID.h" 1.22 +#include "nsIServiceManager.h" 1.23 +#include "nsMIMEHeaderParamImpl.h" 1.24 +#include "nsReadableUtils.h" 1.25 +#include "nsNativeCharsetUtils.h" 1.26 +#include "nsError.h" 1.27 +#include "nsIUnicodeDecoder.h" 1.28 +#include "mozilla/dom/EncodingUtils.h" 1.29 + 1.30 +using mozilla::dom::EncodingUtils; 1.31 + 1.32 +// static functions declared below are moved from mailnews/mime/src/comi18n.cpp 1.33 + 1.34 +static char *DecodeQ(const char *, uint32_t); 1.35 +static bool Is7bitNonAsciiString(const char *, uint32_t); 1.36 +static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &); 1.37 +static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&); 1.38 +static nsresult internalDecodeParameter(const nsACString&, const char*, 1.39 + const char*, bool, bool, nsACString&); 1.40 + 1.41 +// XXX The chance of UTF-7 being used in the message header is really 1.42 +// low, but in theory it's possible. 1.43 +#define IS_7BIT_NON_ASCII_CHARSET(cset) \ 1.44 + (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \ 1.45 + !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \ 1.46 + !nsCRT::strncasecmp((cset), "UTF-7", 5)) 1.47 + 1.48 +NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam) 1.49 + 1.50 +NS_IMETHODIMP 1.51 +nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, 1.52 + const char *aParamName, 1.53 + const nsACString& aFallbackCharset, 1.54 + bool aTryLocaleCharset, 1.55 + char **aLang, nsAString& aResult) 1.56 +{ 1.57 + return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING, 1.58 + aFallbackCharset, aTryLocaleCharset, aLang, aResult); 1.59 +} 1.60 + 1.61 +NS_IMETHODIMP 1.62 +nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, 1.63 + const char *aParamName, 1.64 + const nsACString& aFallbackCharset, 1.65 + bool aTryLocaleCharset, 1.66 + char **aLang, nsAString& aResult) 1.67 +{ 1.68 + return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING, 1.69 + aFallbackCharset, aTryLocaleCharset, aLang, aResult); 1.70 +} 1.71 + 1.72 +// XXX : aTryLocaleCharset is not yet effective. 1.73 +nsresult 1.74 +nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, 1.75 + const char *aParamName, 1.76 + ParamDecoding aDecoding, 1.77 + const nsACString& aFallbackCharset, 1.78 + bool aTryLocaleCharset, 1.79 + char **aLang, nsAString& aResult) 1.80 +{ 1.81 + aResult.Truncate(); 1.82 + nsresult rv; 1.83 + 1.84 + // get parameter (decode RFC 2231/5987 when applicable, as specified by 1.85 + // aDecoding (5987 being a subset of 2231) and return charset.) 1.86 + nsXPIDLCString med; 1.87 + nsXPIDLCString charset; 1.88 + rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, 1.89 + aDecoding, getter_Copies(charset), aLang, 1.90 + getter_Copies(med)); 1.91 + if (NS_FAILED(rv)) 1.92 + return rv; 1.93 + 1.94 + // convert to UTF-8 after charset conversion and RFC 2047 decoding 1.95 + // if necessary. 1.96 + 1.97 + nsAutoCString str1; 1.98 + rv = internalDecodeParameter(med, charset.get(), nullptr, false, 1.99 + // was aDecoding == MIME_FIELD_ENCODING 1.100 + // see bug 875615 1.101 + true, 1.102 + str1); 1.103 + NS_ENSURE_SUCCESS(rv, rv); 1.104 + 1.105 + if (!aFallbackCharset.IsEmpty()) 1.106 + { 1.107 + nsAutoCString charset; 1.108 + EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset); 1.109 + nsAutoCString str2; 1.110 + nsCOMPtr<nsIUTF8ConverterService> 1.111 + cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); 1.112 + if (cvtUTF8 && 1.113 + NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, 1.114 + PromiseFlatCString(aFallbackCharset).get(), false, 1.115 + !charset.EqualsLiteral("UTF-8"), 1.116 + 1, str2))) { 1.117 + CopyUTF8toUTF16(str2, aResult); 1.118 + return NS_OK; 1.119 + } 1.120 + } 1.121 + 1.122 + if (IsUTF8(str1)) { 1.123 + CopyUTF8toUTF16(str1, aResult); 1.124 + return NS_OK; 1.125 + } 1.126 + 1.127 + if (aTryLocaleCharset && !NS_IsNativeUTF8()) 1.128 + return NS_CopyNativeToUnicode(str1, aResult); 1.129 + 1.130 + CopyASCIItoUTF16(str1, aResult); 1.131 + return NS_OK; 1.132 +} 1.133 + 1.134 +// remove backslash-encoded sequences from quoted-strings 1.135 +// modifies string in place, potentially shortening it 1.136 +void RemoveQuotedStringEscapes(char *src) 1.137 +{ 1.138 + char *dst = src; 1.139 + 1.140 + for (char *c = src; *c; ++c) 1.141 + { 1.142 + if (c[0] == '\\' && c[1]) 1.143 + { 1.144 + // skip backslash if not at end 1.145 + ++c; 1.146 + } 1.147 + *dst++ = *c; 1.148 + } 1.149 + *dst = 0; 1.150 +} 1.151 + 1.152 +// true is character is a hex digit 1.153 +bool IsHexDigit(char aChar) 1.154 +{ 1.155 + char c = aChar; 1.156 + 1.157 + return (c >= 'a' && c <= 'f') || 1.158 + (c >= 'A' && c <= 'F') || 1.159 + (c >= '0' && c <= '9'); 1.160 +} 1.161 + 1.162 +// validate that a C String containing %-escapes is syntactically valid 1.163 +bool IsValidPercentEscaped(const char *aValue, int32_t len) 1.164 +{ 1.165 + for (int32_t i = 0; i < len; i++) { 1.166 + if (aValue[i] == '%') { 1.167 + if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) { 1.168 + return false; 1.169 + } 1.170 + } 1.171 + } 1.172 + return true; 1.173 +} 1.174 + 1.175 +// Support for continuations (RFC 2231, Section 3) 1.176 + 1.177 +// only a sane number supported 1.178 +#define MAX_CONTINUATIONS 999 1.179 + 1.180 +// part of a continuation 1.181 + 1.182 +class Continuation { 1.183 + public: 1.184 + Continuation(const char *aValue, uint32_t aLength, 1.185 + bool aNeedsPercentDecoding, bool aWasQuotedString) { 1.186 + value = aValue; 1.187 + length = aLength; 1.188 + needsPercentDecoding = aNeedsPercentDecoding; 1.189 + wasQuotedString = aWasQuotedString; 1.190 + } 1.191 + Continuation() { 1.192 + // empty constructor needed for nsTArray 1.193 + value = 0L; 1.194 + length = 0; 1.195 + needsPercentDecoding = false; 1.196 + wasQuotedString = false; 1.197 + } 1.198 + ~Continuation() {} 1.199 + 1.200 + const char *value; 1.201 + uint32_t length; 1.202 + bool needsPercentDecoding; 1.203 + bool wasQuotedString; 1.204 +}; 1.205 + 1.206 +// combine segments into a single string, returning the allocated string 1.207 +// (or nullptr) while emptying the list 1.208 +char *combineContinuations(nsTArray<Continuation>& aArray) 1.209 +{ 1.210 + // Sanity check 1.211 + if (aArray.Length() == 0) 1.212 + return nullptr; 1.213 + 1.214 + // Get an upper bound for the length 1.215 + uint32_t length = 0; 1.216 + for (uint32_t i = 0; i < aArray.Length(); i++) { 1.217 + length += aArray[i].length; 1.218 + } 1.219 + 1.220 + // Allocate 1.221 + char *result = (char *) nsMemory::Alloc(length + 1); 1.222 + 1.223 + // Concatenate 1.224 + if (result) { 1.225 + *result = '\0'; 1.226 + 1.227 + for (uint32_t i = 0; i < aArray.Length(); i++) { 1.228 + Continuation cont = aArray[i]; 1.229 + if (! cont.value) break; 1.230 + 1.231 + char *c = result + strlen(result); 1.232 + strncat(result, cont.value, cont.length); 1.233 + if (cont.needsPercentDecoding) { 1.234 + nsUnescape(c); 1.235 + } 1.236 + if (cont.wasQuotedString) { 1.237 + RemoveQuotedStringEscapes(c); 1.238 + } 1.239 + } 1.240 + 1.241 + // return null if empty value 1.242 + if (*result == '\0') { 1.243 + nsMemory::Free(result); 1.244 + result = nullptr; 1.245 + } 1.246 + } else { 1.247 + // Handle OOM 1.248 + NS_WARNING("Out of memory\n"); 1.249 + } 1.250 + 1.251 + return result; 1.252 +} 1.253 + 1.254 +// add a continuation, return false on error if segment already has been seen 1.255 +bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex, 1.256 + const char *aValue, uint32_t aLength, 1.257 + bool aNeedsPercentDecoding, bool aWasQuotedString) 1.258 +{ 1.259 + if (aIndex < aArray.Length() && aArray[aIndex].value) { 1.260 + NS_WARNING("duplicate RC2231 continuation segment #\n"); 1.261 + return false; 1.262 + } 1.263 + 1.264 + if (aIndex > MAX_CONTINUATIONS) { 1.265 + NS_WARNING("RC2231 continuation segment # exceeds limit\n"); 1.266 + return false; 1.267 + } 1.268 + 1.269 + if (aNeedsPercentDecoding && aWasQuotedString) { 1.270 + NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n"); 1.271 + return false; 1.272 + } 1.273 + 1.274 + Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString); 1.275 + 1.276 + if (aArray.Length() <= aIndex) { 1.277 + aArray.SetLength(aIndex + 1); 1.278 + } 1.279 + aArray[aIndex] = cont; 1.280 + 1.281 + return true; 1.282 +} 1.283 + 1.284 +// parse a segment number; return -1 on error 1.285 +int32_t parseSegmentNumber(const char *aValue, int32_t aLen) 1.286 +{ 1.287 + if (aLen < 1) { 1.288 + NS_WARNING("segment number missing\n"); 1.289 + return -1; 1.290 + } 1.291 + 1.292 + if (aLen > 1 && aValue[0] == '0') { 1.293 + NS_WARNING("leading '0' not allowed in segment number\n"); 1.294 + return -1; 1.295 + } 1.296 + 1.297 + int32_t segmentNumber = 0; 1.298 + 1.299 + for (int32_t i = 0; i < aLen; i++) { 1.300 + if (! (aValue[i] >= '0' && aValue[i] <= '9')) { 1.301 + NS_WARNING("invalid characters in segment number\n"); 1.302 + return -1; 1.303 + } 1.304 + 1.305 + segmentNumber *= 10; 1.306 + segmentNumber += aValue[i] - '0'; 1.307 + if (segmentNumber > MAX_CONTINUATIONS) { 1.308 + NS_WARNING("Segment number exceeds sane size\n"); 1.309 + return -1; 1.310 + } 1.311 + } 1.312 + 1.313 + return segmentNumber; 1.314 +} 1.315 + 1.316 +// validate a given octet sequence for compliance with the specified 1.317 +// encoding 1.318 +bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets) 1.319 +{ 1.320 + nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService 1.321 + (NS_UTF8CONVERTERSERVICE_CONTRACTID)); 1.322 + if (!cvtUTF8) { 1.323 + NS_WARNING("Can't get UTF8ConverterService\n"); 1.324 + return false; 1.325 + } 1.326 + 1.327 + nsAutoCString tmpRaw; 1.328 + tmpRaw.Assign(aOctets); 1.329 + nsAutoCString tmpDecoded; 1.330 + 1.331 + nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw, 1.332 + PromiseFlatCString(aCharset).get(), 1.333 + false, false, 1, tmpDecoded); 1.334 + 1.335 + if (rv != NS_OK) { 1.336 + // we can't decode; charset may be unsupported, or the octet sequence 1.337 + // is broken (illegal or incomplete octet sequence contained) 1.338 + NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n"); 1.339 + return false; 1.340 + } 1.341 + 1.342 + return true; 1.343 +} 1.344 + 1.345 +// moved almost verbatim from mimehdrs.cpp 1.346 +// char * 1.347 +// MimeHeaders_get_parameter (const char *header_value, const char *parm_name, 1.348 +// char **charset, char **language) 1.349 +// 1.350 +// The format of these header lines is 1.351 +// <token> [ ';' <token> '=' <token-or-quoted-string> ]* 1.352 +NS_IMETHODIMP 1.353 +nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, 1.354 + const char *aParamName, 1.355 + char **aCharset, 1.356 + char **aLang, 1.357 + char **aResult) 1.358 +{ 1.359 + return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING, 1.360 + aCharset, aLang, aResult); 1.361 +} 1.362 + 1.363 + 1.364 +nsresult 1.365 +nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, 1.366 + const char *aParamName, 1.367 + ParamDecoding aDecoding, 1.368 + char **aCharset, 1.369 + char **aLang, 1.370 + char **aResult) 1.371 +{ 1.372 + 1.373 + if (!aHeaderValue || !*aHeaderValue || !aResult) 1.374 + return NS_ERROR_INVALID_ARG; 1.375 + 1.376 + *aResult = nullptr; 1.377 + 1.378 + if (aCharset) *aCharset = nullptr; 1.379 + if (aLang) *aLang = nullptr; 1.380 + 1.381 + nsAutoCString charset; 1.382 + 1.383 + // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable 1.384 + // them for HTTP header fields later on, see bug 776324 1.385 + bool acceptContinuations = true; 1.386 + 1.387 + const char *str = aHeaderValue; 1.388 + 1.389 + // skip leading white space. 1.390 + for (; *str && nsCRT::IsAsciiSpace(*str); ++str) 1.391 + ; 1.392 + const char *start = str; 1.393 + 1.394 + // aParamName is empty. return the first (possibly) _unnamed_ 'parameter' 1.395 + // For instance, return 'inline' in the following case: 1.396 + // Content-Disposition: inline; filename=..... 1.397 + if (!aParamName || !*aParamName) 1.398 + { 1.399 + for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str) 1.400 + ; 1.401 + if (str == start) 1.402 + return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY; 1.403 + 1.404 + *aResult = (char *) nsMemory::Clone(start, (str - start) + 1); 1.405 + NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY); 1.406 + (*aResult)[str - start] = '\0'; // null-terminate 1.407 + return NS_OK; 1.408 + } 1.409 + 1.410 + /* Skip forward to first ';' */ 1.411 + for (; *str && *str != ';' && *str != ','; ++str) 1.412 + ; 1.413 + if (*str) 1.414 + str++; 1.415 + /* Skip over following whitespace */ 1.416 + for (; *str && nsCRT::IsAsciiSpace(*str); ++str) 1.417 + ; 1.418 + 1.419 + // Some broken http servers just specify parameters 1.420 + // like 'filename' without specifying disposition 1.421 + // method. Rewind to the first non-white-space 1.422 + // character. 1.423 + 1.424 + if (!*str) 1.425 + str = start; 1.426 + 1.427 + // RFC2231 - The legitimate parm format can be: 1.428 + // A. title=ThisIsTitle 1.429 + // B. title*=us-ascii'en-us'This%20is%20wierd. 1.430 + // C. title*0*=us-ascii'en'This%20is%20wierd.%20We 1.431 + // title*1*=have%20to%20support%20this. 1.432 + // title*2="Else..." 1.433 + // D. title*0="Hey, what you think you are doing?" 1.434 + // title*1="There is no charset and lang info." 1.435 + // RFC5987: only A and B 1.436 + 1.437 + // collect results for the different algorithms (plain filename, 1.438 + // RFC5987/2231-encoded filename, + continuations) separately and decide 1.439 + // which to use at the end 1.440 + char *caseAResult = nullptr; 1.441 + char *caseBResult = nullptr; 1.442 + char *caseCDResult = nullptr; 1.443 + 1.444 + // collect continuation segments 1.445 + nsTArray<Continuation> segments; 1.446 + 1.447 + 1.448 + // our copies of the charset parameter, kept separately as they might 1.449 + // differ for the two formats 1.450 + nsDependentCSubstring charsetB, charsetCD; 1.451 + 1.452 + nsDependentCSubstring lang; 1.453 + 1.454 + int32_t paramLen = strlen(aParamName); 1.455 + 1.456 + while (*str) { 1.457 + // find name/value 1.458 + 1.459 + const char *nameStart = str; 1.460 + const char *nameEnd = nullptr; 1.461 + const char *valueStart = str; 1.462 + const char *valueEnd = nullptr; 1.463 + bool isQuotedString = false; 1.464 + 1.465 + NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace."); 1.466 + 1.467 + // Skip forward to the end of this token. 1.468 + for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++) 1.469 + ; 1.470 + nameEnd = str; 1.471 + 1.472 + int32_t nameLen = nameEnd - nameStart; 1.473 + 1.474 + // Skip over whitespace, '=', and whitespace 1.475 + while (nsCRT::IsAsciiSpace(*str)) ++str; 1.476 + if (!*str) { 1.477 + break; 1.478 + } 1.479 + if (*str++ != '=') { 1.480 + // don't accept parameters without "=" 1.481 + goto increment_str; 1.482 + } 1.483 + while (nsCRT::IsAsciiSpace(*str)) ++str; 1.484 + 1.485 + if (*str != '"') { 1.486 + // The value is a token, not a quoted string. 1.487 + valueStart = str; 1.488 + for (valueEnd = str; 1.489 + *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';'; 1.490 + valueEnd++) 1.491 + ; 1.492 + str = valueEnd; 1.493 + } else { 1.494 + isQuotedString = true; 1.495 + 1.496 + ++str; 1.497 + valueStart = str; 1.498 + for (valueEnd = str; *valueEnd; ++valueEnd) { 1.499 + if (*valueEnd == '\\' && *(valueEnd + 1)) 1.500 + ++valueEnd; 1.501 + else if (*valueEnd == '"') 1.502 + break; 1.503 + } 1.504 + str = valueEnd; 1.505 + // *valueEnd != null means that *valueEnd is quote character. 1.506 + if (*valueEnd) 1.507 + str++; 1.508 + } 1.509 + 1.510 + // See if this is the simplest case (case A above), 1.511 + // a 'single' line value with no charset and lang. 1.512 + // If so, copy it and return. 1.513 + if (nameLen == paramLen && 1.514 + !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) { 1.515 + 1.516 + if (caseAResult) { 1.517 + // we already have one caseA result, ignore subsequent ones 1.518 + goto increment_str; 1.519 + } 1.520 + 1.521 + // if the parameter spans across multiple lines we have to strip out the 1.522 + // line continuation -- jht 4/29/98 1.523 + nsAutoCString tempStr(valueStart, valueEnd - valueStart); 1.524 + tempStr.StripChars("\r\n"); 1.525 + char *res = ToNewCString(tempStr); 1.526 + NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY); 1.527 + 1.528 + if (isQuotedString) 1.529 + RemoveQuotedStringEscapes(res); 1.530 + 1.531 + caseAResult = res; 1.532 + // keep going, we may find a RFC 2231/5987 encoded alternative 1.533 + } 1.534 + // case B, C, and D 1.535 + else if (nameLen > paramLen && 1.536 + !nsCRT::strncasecmp(nameStart, aParamName, paramLen) && 1.537 + *(nameStart + paramLen) == '*') { 1.538 + 1.539 + // 1st char past '*' 1.540 + const char *cp = nameStart + paramLen + 1; 1.541 + 1.542 + // if param name ends in "*" we need do to RFC5987 "ext-value" decoding 1.543 + bool needExtDecoding = *(nameEnd - 1) == '*'; 1.544 + 1.545 + bool caseB = nameLen == paramLen + 1; 1.546 + bool caseCStart = (*cp == '0') && needExtDecoding; 1.547 + 1.548 + // parse the segment number 1.549 + int32_t segmentNumber = -1; 1.550 + if (!caseB) { 1.551 + int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0); 1.552 + segmentNumber = parseSegmentNumber(cp, segLen); 1.553 + 1.554 + if (segmentNumber == -1) { 1.555 + acceptContinuations = false; 1.556 + goto increment_str; 1.557 + } 1.558 + } 1.559 + 1.560 + // CaseB and start of CaseC: requires charset and optional language 1.561 + // in quotes (quotes required even if lang is blank) 1.562 + if (caseB || (caseCStart && acceptContinuations)) { 1.563 + // look for single quotation mark(') 1.564 + const char *sQuote1 = PL_strchr(valueStart, 0x27); 1.565 + const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr; 1.566 + 1.567 + // Two single quotation marks must be present even in 1.568 + // absence of charset and lang. 1.569 + if (!sQuote1 || !sQuote2) { 1.570 + NS_WARNING("Mandatory two single quotes are missing in header parameter\n"); 1.571 + } 1.572 + 1.573 + const char *charsetStart = nullptr; 1.574 + int32_t charsetLength = 0; 1.575 + const char *langStart = nullptr; 1.576 + int32_t langLength = 0; 1.577 + const char *rawValStart = nullptr; 1.578 + int32_t rawValLength = 0; 1.579 + 1.580 + if (sQuote2 && sQuote1) { 1.581 + // both delimiters present: charSet'lang'rawVal 1.582 + rawValStart = sQuote2 + 1; 1.583 + rawValLength = valueEnd - rawValStart; 1.584 + 1.585 + langStart = sQuote1 + 1; 1.586 + langLength = sQuote2 - langStart; 1.587 + 1.588 + charsetStart = valueStart; 1.589 + charsetLength = sQuote1 - charsetStart; 1.590 + } 1.591 + else if (sQuote1) { 1.592 + // one delimiter; assume charset'rawVal 1.593 + rawValStart = sQuote1 + 1; 1.594 + rawValLength = valueEnd - rawValStart; 1.595 + 1.596 + charsetStart = valueStart; 1.597 + charsetLength = sQuote1 - valueStart; 1.598 + } 1.599 + else { 1.600 + // no delimiter: just rawVal 1.601 + rawValStart = valueStart; 1.602 + rawValLength = valueEnd - valueStart; 1.603 + } 1.604 + 1.605 + if (langLength != 0) { 1.606 + lang.Assign(langStart, langLength); 1.607 + } 1.608 + 1.609 + // keep the charset for later 1.610 + if (caseB) { 1.611 + charsetB.Assign(charsetStart, charsetLength); 1.612 + } else { 1.613 + // if caseCorD 1.614 + charsetCD.Assign(charsetStart, charsetLength); 1.615 + } 1.616 + 1.617 + // non-empty value part 1.618 + if (rawValLength > 0) { 1.619 + if (!caseBResult && caseB) { 1.620 + if (!IsValidPercentEscaped(rawValStart, rawValLength)) { 1.621 + goto increment_str; 1.622 + } 1.623 + 1.624 + // allocate buffer for the raw value 1.625 + char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1); 1.626 + if (!tmpResult) { 1.627 + goto increment_str; 1.628 + } 1.629 + *(tmpResult + rawValLength) = 0; 1.630 + 1.631 + nsUnescape(tmpResult); 1.632 + caseBResult = tmpResult; 1.633 + } else { 1.634 + // caseC 1.635 + bool added = addContinuation(segments, 0, rawValStart, 1.636 + rawValLength, needExtDecoding, 1.637 + isQuotedString); 1.638 + 1.639 + if (!added) { 1.640 + // continuation not added, stop processing them 1.641 + acceptContinuations = false; 1.642 + } 1.643 + } 1.644 + } 1.645 + } // end of if-block : title*0*= or title*= 1.646 + // caseD: a line of multiline param with no need for unescaping : title*[0-9]= 1.647 + // or 2nd or later lines of a caseC param : title*[1-9]*= 1.648 + else if (acceptContinuations && segmentNumber != -1) { 1.649 + uint32_t valueLength = valueEnd - valueStart; 1.650 + 1.651 + bool added = addContinuation(segments, segmentNumber, valueStart, 1.652 + valueLength, needExtDecoding, 1.653 + isQuotedString); 1.654 + 1.655 + if (!added) { 1.656 + // continuation not added, stop processing them 1.657 + acceptContinuations = false; 1.658 + } 1.659 + } // end of if-block : title*[0-9]= or title*[1-9]*= 1.660 + } 1.661 + 1.662 + // str now points after the end of the value. 1.663 + // skip over whitespace, ';', whitespace. 1.664 +increment_str: 1.665 + while (nsCRT::IsAsciiSpace(*str)) ++str; 1.666 + if (*str == ';') { 1.667 + ++str; 1.668 + } else { 1.669 + // stop processing the header field; either we are done or the 1.670 + // separator was missing 1.671 + break; 1.672 + } 1.673 + while (nsCRT::IsAsciiSpace(*str)) ++str; 1.674 + } 1.675 + 1.676 + caseCDResult = combineContinuations(segments); 1.677 + 1.678 + if (caseBResult && !charsetB.IsEmpty()) { 1.679 + // check that the 2231/5987 result decodes properly given the 1.680 + // specified character set 1.681 + if (!IsValidOctetSequenceForCharset(charsetB, caseBResult)) 1.682 + caseBResult = nullptr; 1.683 + } 1.684 + 1.685 + if (caseCDResult && !charsetCD.IsEmpty()) { 1.686 + // check that the 2231/5987 result decodes properly given the 1.687 + // specified character set 1.688 + if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult)) 1.689 + caseCDResult = nullptr; 1.690 + } 1.691 + 1.692 + if (caseBResult) { 1.693 + // prefer simple 5987 format over 2231 with continuations 1.694 + *aResult = caseBResult; 1.695 + caseBResult = nullptr; 1.696 + charset.Assign(charsetB); 1.697 + } 1.698 + else if (caseCDResult) { 1.699 + // prefer 2231/5987 with or without continuations over plain format 1.700 + *aResult = caseCDResult; 1.701 + caseCDResult = nullptr; 1.702 + charset.Assign(charsetCD); 1.703 + } 1.704 + else if (caseAResult) { 1.705 + *aResult = caseAResult; 1.706 + caseAResult = nullptr; 1.707 + } 1.708 + 1.709 + // free unused stuff 1.710 + nsMemory::Free(caseAResult); 1.711 + nsMemory::Free(caseBResult); 1.712 + nsMemory::Free(caseCDResult); 1.713 + 1.714 + // if we have a result 1.715 + if (*aResult) { 1.716 + // then return charset and lang as well 1.717 + if (aLang && !lang.IsEmpty()) { 1.718 + uint32_t len = lang.Length(); 1.719 + *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1); 1.720 + if (*aLang) { 1.721 + *(*aLang + len) = 0; 1.722 + } 1.723 + } 1.724 + if (aCharset && !charset.IsEmpty()) { 1.725 + uint32_t len = charset.Length(); 1.726 + *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1); 1.727 + if (*aCharset) { 1.728 + *(*aCharset + len) = 0; 1.729 + } 1.730 + } 1.731 + } 1.732 + 1.733 + return *aResult ? NS_OK : NS_ERROR_INVALID_ARG; 1.734 +} 1.735 + 1.736 +nsresult 1.737 +internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset, 1.738 + bool aOverrideCharset, bool aEatContinuations, 1.739 + nsACString& aResult) 1.740 +{ 1.741 + aResult.Truncate(); 1.742 + if (!aHeaderVal) 1.743 + return NS_ERROR_INVALID_ARG; 1.744 + if (!*aHeaderVal) 1.745 + return NS_OK; 1.746 + 1.747 + 1.748 + // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but 1.749 + // aDefaultCharset is specified, decodes RFC 2047 encoding and converts 1.750 + // to UTF-8. Otherwise, just strips away CRLF. 1.751 + if (PL_strstr(aHeaderVal, "=?") || 1.752 + (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || 1.753 + Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) { 1.754 + DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult); 1.755 + } else if (aEatContinuations && 1.756 + (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) { 1.757 + aResult = aHeaderVal; 1.758 + } else { 1.759 + aEatContinuations = false; 1.760 + aResult = aHeaderVal; 1.761 + } 1.762 + 1.763 + if (aEatContinuations) { 1.764 + nsAutoCString temp(aResult); 1.765 + temp.ReplaceSubstring("\n\t", " "); 1.766 + temp.ReplaceSubstring("\r\t", " "); 1.767 + temp.StripChars("\r\n"); 1.768 + aResult = temp; 1.769 + } 1.770 + 1.771 + return NS_OK; 1.772 +} 1.773 + 1.774 +NS_IMETHODIMP 1.775 +nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, 1.776 + const char* aDefaultCharset, 1.777 + bool aOverrideCharset, 1.778 + bool aEatContinuations, 1.779 + nsACString& aResult) 1.780 +{ 1.781 + return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset, 1.782 + aOverrideCharset, aEatContinuations, 1.783 + aResult); 1.784 +} 1.785 + 1.786 +// true if the character is allowed in a RFC 5987 value 1.787 +// see RFC 5987, Section 3.2.1, "attr-char" 1.788 +bool IsRFC5987AttrChar(char aChar) 1.789 +{ 1.790 + char c = aChar; 1.791 + 1.792 + return (c >= 'a' && c <= 'z') || 1.793 + (c >= 'A' && c <= 'Z') || 1.794 + (c >= '0' && c <= '9') || 1.795 + (c == '!' || c == '#' || c == '$' || c == '&' || 1.796 + c == '+' || c == '-' || c == '.' || c == '^' || 1.797 + c == '_' || c == '`' || c == '|' || c == '~'); 1.798 +} 1.799 + 1.800 +// percent-decode a value 1.801 +// returns false on failure 1.802 +bool PercentDecode(nsACString& aValue) 1.803 +{ 1.804 + char *c = (char *) nsMemory::Alloc(aValue.Length() + 1); 1.805 + if (!c) { 1.806 + return false; 1.807 + } 1.808 + 1.809 + strcpy(c, PromiseFlatCString(aValue).get()); 1.810 + nsUnescape(c); 1.811 + aValue.Assign(c); 1.812 + nsMemory::Free(c); 1.813 + 1.814 + return true; 1.815 +} 1.816 + 1.817 +// Decode a parameter value using the encoding defined in RFC 5987 1.818 +// 1.819 +// charset "'" [ language ] "'" value-chars 1.820 +NS_IMETHODIMP 1.821 +nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal, 1.822 + nsACString& aLang, 1.823 + nsAString& aResult) 1.824 +{ 1.825 + nsAutoCString charset; 1.826 + nsAutoCString language; 1.827 + nsAutoCString value; 1.828 + 1.829 + uint32_t delimiters = 0; 1.830 + const char *encoded = PromiseFlatCString(aParamVal).get(); 1.831 + const char *c = encoded; 1.832 + 1.833 + while (*c) { 1.834 + char tc = *c++; 1.835 + 1.836 + if (tc == '\'') { 1.837 + // single quote 1.838 + delimiters++; 1.839 + } else if (((unsigned char)tc) >= 128) { 1.840 + // fail early, not ASCII 1.841 + NS_WARNING("non-US-ASCII character in RFC5987-encoded param"); 1.842 + return NS_ERROR_INVALID_ARG; 1.843 + } else { 1.844 + if (delimiters == 0) { 1.845 + // valid characters are checked later implicitly 1.846 + charset.Append(tc); 1.847 + } else if (delimiters == 1) { 1.848 + // no value checking for now 1.849 + language.Append(tc); 1.850 + } else if (delimiters == 2) { 1.851 + if (IsRFC5987AttrChar(tc)) { 1.852 + value.Append(tc); 1.853 + } else if (tc == '%') { 1.854 + if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) { 1.855 + // we expect two more characters 1.856 + NS_WARNING("broken %-escape in RFC5987-encoded param"); 1.857 + return NS_ERROR_INVALID_ARG; 1.858 + } 1.859 + value.Append(tc); 1.860 + // we consume two more 1.861 + value.Append(*c++); 1.862 + value.Append(*c++); 1.863 + } else { 1.864 + // character not allowed here 1.865 + NS_WARNING("invalid character in RFC5987-encoded param"); 1.866 + return NS_ERROR_INVALID_ARG; 1.867 + } 1.868 + } 1.869 + } 1.870 + } 1.871 + 1.872 + if (delimiters != 2) { 1.873 + NS_WARNING("missing delimiters in RFC5987-encoded param"); 1.874 + return NS_ERROR_INVALID_ARG; 1.875 + } 1.876 + 1.877 + // abort early for unsupported encodings 1.878 + if (!charset.LowerCaseEqualsLiteral("utf-8")) { 1.879 + NS_WARNING("unsupported charset in RFC5987-encoded param"); 1.880 + return NS_ERROR_INVALID_ARG; 1.881 + } 1.882 + 1.883 + // percent-decode 1.884 + if (!PercentDecode(value)) { 1.885 + return NS_ERROR_OUT_OF_MEMORY; 1.886 + } 1.887 + 1.888 + // return the encoding 1.889 + aLang.Assign(language); 1.890 + 1.891 + // finally convert octet sequence to UTF-8 and be done 1.892 + nsresult rv = NS_OK; 1.893 + nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 = 1.894 + do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv); 1.895 + NS_ENSURE_SUCCESS(rv, rv); 1.896 + 1.897 + nsAutoCString utf8; 1.898 + rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8); 1.899 + NS_ENSURE_SUCCESS(rv, rv); 1.900 + 1.901 + CopyUTF8toUTF16(utf8, aResult); 1.902 + return NS_OK; 1.903 +} 1.904 + 1.905 +nsresult 1.906 +internalDecodeParameter(const nsACString& aParamValue, const char* aCharset, 1.907 + const char* aDefaultCharset, bool aOverrideCharset, 1.908 + bool aDecode2047, nsACString& aResult) 1.909 +{ 1.910 + aResult.Truncate(); 1.911 + // If aCharset is given, aParamValue was obtained from RFC2231/5987 1.912 + // encoding and we're pretty sure that it's in aCharset. 1.913 + if (aCharset && *aCharset) 1.914 + { 1.915 + nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); 1.916 + if (cvtUTF8) 1.917 + return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset, 1.918 + true, true, 1, aResult); 1.919 + } 1.920 + 1.921 + const nsAFlatCString& param = PromiseFlatCString(aParamValue); 1.922 + nsAutoCString unQuoted; 1.923 + nsACString::const_iterator s, e; 1.924 + param.BeginReading(s); 1.925 + param.EndReading(e); 1.926 + 1.927 + // strip '\' when used to quote CR, LF, '"' and '\' 1.928 + for ( ; s != e; ++s) { 1.929 + if ((*s == '\\')) { 1.930 + if (++s == e) { 1.931 + --s; // '\' is at the end. move back and append '\'. 1.932 + } 1.933 + else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') { 1.934 + --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\' 1.935 + } 1.936 + // else : skip '\' and append the quoted character. 1.937 + } 1.938 + unQuoted.Append(*s); 1.939 + } 1.940 + 1.941 + aResult = unQuoted; 1.942 + nsresult rv = NS_OK; 1.943 + 1.944 + if (aDecode2047) { 1.945 + nsAutoCString decoded; 1.946 + 1.947 + // Try RFC 2047 encoding, instead. 1.948 + rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset, 1.949 + aOverrideCharset, true, decoded); 1.950 + 1.951 + if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) 1.952 + aResult = decoded; 1.953 + } 1.954 + 1.955 + return rv; 1.956 +} 1.957 + 1.958 +NS_IMETHODIMP 1.959 +nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, 1.960 + const char* aCharset, 1.961 + const char* aDefaultCharset, 1.962 + bool aOverrideCharset, 1.963 + nsACString& aResult) 1.964 +{ 1.965 + return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset, 1.966 + aOverrideCharset, true, aResult); 1.967 +} 1.968 + 1.969 +#define ISHEXCHAR(c) \ 1.970 + ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \ 1.971 + (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \ 1.972 + (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66)) 1.973 + 1.974 +// Decode Q encoding (RFC 2047). 1.975 +// static 1.976 +char *DecodeQ(const char *in, uint32_t length) 1.977 +{ 1.978 + char *out, *dest = 0; 1.979 + 1.980 + out = dest = (char *)PR_Calloc(length + 1, sizeof(char)); 1.981 + if (dest == nullptr) 1.982 + return nullptr; 1.983 + while (length > 0) { 1.984 + unsigned c = 0; 1.985 + switch (*in) { 1.986 + case '=': 1.987 + // check if |in| in the form of '=hh' where h is [0-9a-fA-F]. 1.988 + if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2])) 1.989 + goto badsyntax; 1.990 + PR_sscanf(in + 1, "%2X", &c); 1.991 + *out++ = (char) c; 1.992 + in += 3; 1.993 + length -= 3; 1.994 + break; 1.995 + 1.996 + case '_': 1.997 + *out++ = ' '; 1.998 + in++; 1.999 + length--; 1.1000 + break; 1.1001 + 1.1002 + default: 1.1003 + if (*in & 0x80) goto badsyntax; 1.1004 + *out++ = *in++; 1.1005 + length--; 1.1006 + } 1.1007 + } 1.1008 + *out++ = '\0'; 1.1009 + 1.1010 + for (out = dest; *out ; ++out) { 1.1011 + if (*out == '\t') 1.1012 + *out = ' '; 1.1013 + } 1.1014 + 1.1015 + return dest; 1.1016 + 1.1017 + badsyntax: 1.1018 + PR_Free(dest); 1.1019 + return nullptr; 1.1020 +} 1.1021 + 1.1022 +// check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) 1.1023 +// or has ESC which may be an indication that it's in one of many ISO 1.1024 +// 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554). 1.1025 +// static 1.1026 +bool Is7bitNonAsciiString(const char *input, uint32_t len) 1.1027 +{ 1.1028 + int32_t c; 1.1029 + 1.1030 + enum { hz_initial, // No HZ seen yet 1.1031 + hz_escaped, // Inside an HZ ~{ escape sequence 1.1032 + hz_seen, // Have seen at least one complete HZ sequence 1.1033 + hz_notpresent // Have seen something that is not legal HZ 1.1034 + } hz_state; 1.1035 + 1.1036 + hz_state = hz_initial; 1.1037 + while (len) { 1.1038 + c = uint8_t(*input++); 1.1039 + len--; 1.1040 + if (c & 0x80) return false; 1.1041 + if (c == 0x1B) return true; 1.1042 + if (c == '~') { 1.1043 + switch (hz_state) { 1.1044 + case hz_initial: 1.1045 + case hz_seen: 1.1046 + if (*input == '{') { 1.1047 + hz_state = hz_escaped; 1.1048 + } else if (*input == '~') { 1.1049 + // ~~ is the HZ encoding of ~. Skip over second ~ as well 1.1050 + hz_state = hz_seen; 1.1051 + input++; 1.1052 + len--; 1.1053 + } else { 1.1054 + hz_state = hz_notpresent; 1.1055 + } 1.1056 + break; 1.1057 + 1.1058 + case hz_escaped: 1.1059 + if (*input == '}') hz_state = hz_seen; 1.1060 + break; 1.1061 + default: 1.1062 + break; 1.1063 + } 1.1064 + } 1.1065 + } 1.1066 + return hz_state == hz_seen; 1.1067 +} 1.1068 + 1.1069 +#define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD) 1.1070 + 1.1071 +// copy 'raw' sequences of octets in aInput to aOutput. 1.1072 +// If aDefaultCharset is specified, the input is assumed to be in the 1.1073 +// charset and converted to UTF-8. Otherwise, a blind copy is made. 1.1074 +// If aDefaultCharset is specified, but the conversion to UTF-8 1.1075 +// is not successful, each octet is replaced by Unicode replacement 1.1076 +// chars. *aOutput is advanced by the number of output octets. 1.1077 +// static 1.1078 +void CopyRawHeader(const char *aInput, uint32_t aLen, 1.1079 + const char *aDefaultCharset, nsACString &aOutput) 1.1080 +{ 1.1081 + int32_t c; 1.1082 + 1.1083 + // If aDefaultCharset is not specified, make a blind copy. 1.1084 + if (!aDefaultCharset || !*aDefaultCharset) { 1.1085 + aOutput.Append(aInput, aLen); 1.1086 + return; 1.1087 + } 1.1088 + 1.1089 + // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022 1.1090 + // A ~ may indicate it is HZ 1.1091 + while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) { 1.1092 + aOutput.Append(char(c)); 1.1093 + aLen--; 1.1094 + } 1.1095 + if (!aLen) { 1.1096 + return; 1.1097 + } 1.1098 + aInput--; 1.1099 + 1.1100 + // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii 1.1101 + // string and aDefaultCharset is a 7bit non-ascii charset. 1.1102 + bool skipCheck = (c == 0x1B || c == '~') && 1.1103 + IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset); 1.1104 + 1.1105 + // If not UTF-8, treat as default charset 1.1106 + nsCOMPtr<nsIUTF8ConverterService> 1.1107 + cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); 1.1108 + nsAutoCString utf8Text; 1.1109 + if (cvtUTF8 && 1.1110 + NS_SUCCEEDED( 1.1111 + cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), 1.1112 + aDefaultCharset, skipCheck, true, 1, 1.1113 + utf8Text))) { 1.1114 + aOutput.Append(utf8Text); 1.1115 + } else { // replace each octet with Unicode replacement char in UTF-8. 1.1116 + for (uint32_t i = 0; i < aLen; i++) { 1.1117 + c = uint8_t(*aInput++); 1.1118 + if (c & 0x80) 1.1119 + aOutput.Append(REPLACEMENT_CHAR); 1.1120 + else 1.1121 + aOutput.Append(char(c)); 1.1122 + } 1.1123 + } 1.1124 +} 1.1125 + 1.1126 +nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64, 1.1127 + const char *aCharset, nsACString &aResult) 1.1128 +{ 1.1129 + char *decodedText; 1.1130 + NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'"); 1.1131 + if(aQOrBase64 == 'Q') 1.1132 + decodedText = DecodeQ(aEncoded, aLen); 1.1133 + else if (aQOrBase64 == 'B') { 1.1134 + decodedText = PL_Base64Decode(aEncoded, aLen, nullptr); 1.1135 + } else { 1.1136 + return NS_ERROR_INVALID_ARG; 1.1137 + } 1.1138 + 1.1139 + if (!decodedText) { 1.1140 + return NS_ERROR_INVALID_ARG; 1.1141 + } 1.1142 + 1.1143 + nsresult rv; 1.1144 + nsCOMPtr<nsIUTF8ConverterService> 1.1145 + cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv)); 1.1146 + nsAutoCString utf8Text; 1.1147 + if (NS_SUCCEEDED(rv)) { 1.1148 + // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset. 1.1149 + rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText), 1.1150 + aCharset, 1.1151 + IS_7BIT_NON_ASCII_CHARSET(aCharset), 1.1152 + true, 1, utf8Text); 1.1153 + } 1.1154 + PR_Free(decodedText); 1.1155 + if (NS_FAILED(rv)) { 1.1156 + return rv; 1.1157 + } 1.1158 + aResult.Append(utf8Text); 1.1159 + 1.1160 + return NS_OK; 1.1161 +} 1.1162 + 1.1163 +static const char especials[] = "()<>@,;:\\\"/[]?.="; 1.1164 + 1.1165 +// |decode_mime_part2_str| taken from comi18n.c 1.1166 +// Decode RFC2047-encoded words in the input and convert the result to UTF-8. 1.1167 +// If aOverrideCharset is true, charset in RFC2047-encoded words is 1.1168 +// ignored and aDefaultCharset is assumed, instead. aDefaultCharset 1.1169 +// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8. 1.1170 +//static 1.1171 +nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, 1.1172 + bool aOverrideCharset, nsACString &aResult) 1.1173 +{ 1.1174 + const char *p, *q = nullptr, *r; 1.1175 + const char *begin; // tracking pointer for where we are in the input buffer 1.1176 + int32_t isLastEncodedWord = 0; 1.1177 + const char *charsetStart, *charsetEnd; 1.1178 + nsAutoCString prevCharset, curCharset; 1.1179 + nsAutoCString encodedText; 1.1180 + char prevEncoding = '\0', curEncoding; 1.1181 + nsresult rv; 1.1182 + 1.1183 + begin = aHeader; 1.1184 + 1.1185 + // To avoid buffer realloc, if possible, set capacity in advance. No 1.1186 + // matter what, more than 3x expansion can never happen for all charsets 1.1187 + // supported by Mozilla. SCSU/BCSU with the sliding window set to a 1.1188 + // non-BMP block may be exceptions, but Mozilla does not support them. 1.1189 + // Neither any known mail/news program use them. Even if there's, we're 1.1190 + // safe because we don't use a raw *char any more. 1.1191 + aResult.SetCapacity(3 * strlen(aHeader)); 1.1192 + 1.1193 + while ((p = PL_strstr(begin, "=?")) != 0) { 1.1194 + if (isLastEncodedWord) { 1.1195 + // See if it's all whitespace. 1.1196 + for (q = begin; q < p; ++q) { 1.1197 + if (!PL_strchr(" \t\r\n", *q)) break; 1.1198 + } 1.1199 + } 1.1200 + 1.1201 + if (!isLastEncodedWord || q < p) { 1.1202 + if (!encodedText.IsEmpty()) { 1.1203 + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), 1.1204 + prevEncoding, prevCharset.get(), aResult); 1.1205 + if (NS_FAILED(rv)) { 1.1206 + aResult.Append(encodedText); 1.1207 + } 1.1208 + encodedText.Truncate(); 1.1209 + prevCharset.Truncate(); 1.1210 + prevEncoding = '\0'; 1.1211 + } 1.1212 + // copy the part before the encoded-word 1.1213 + CopyRawHeader(begin, p - begin, aDefaultCharset, aResult); 1.1214 + begin = p; 1.1215 + } 1.1216 + 1.1217 + p += 2; 1.1218 + 1.1219 + // Get charset info 1.1220 + charsetStart = p; 1.1221 + charsetEnd = 0; 1.1222 + for (q = p; *q != '?'; q++) { 1.1223 + if (*q <= ' ' || PL_strchr(especials, *q)) { 1.1224 + goto badsyntax; 1.1225 + } 1.1226 + 1.1227 + // RFC 2231 section 5 1.1228 + if (!charsetEnd && *q == '*') { 1.1229 + charsetEnd = q; 1.1230 + } 1.1231 + } 1.1232 + if (!charsetEnd) { 1.1233 + charsetEnd = q; 1.1234 + } 1.1235 + 1.1236 + q++; 1.1237 + curEncoding = nsCRT::ToUpper(*q); 1.1238 + if (curEncoding != 'Q' && curEncoding != 'B') 1.1239 + goto badsyntax; 1.1240 + 1.1241 + if (q[1] != '?') 1.1242 + goto badsyntax; 1.1243 + 1.1244 + r = q; 1.1245 + for (r = q + 2; *r != '?'; r++) { 1.1246 + if (*r < ' ') goto badsyntax; 1.1247 + } 1.1248 + if (r[1] != '=') 1.1249 + goto badsyntax; 1.1250 + else if (r == q + 2) { 1.1251 + // it's empty, skip 1.1252 + begin = r + 2; 1.1253 + isLastEncodedWord = 1; 1.1254 + continue; 1.1255 + } 1.1256 + 1.1257 + curCharset.Assign(charsetStart, charsetEnd - charsetStart); 1.1258 + // Override charset if requested. Never override labeled UTF-8. 1.1259 + // Use default charset instead of UNKNOWN-8BIT 1.1260 + if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) 1.1261 + || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT")) 1.1262 + ) { 1.1263 + curCharset = aDefaultCharset; 1.1264 + } 1.1265 + 1.1266 + const char *R; 1.1267 + R = r; 1.1268 + if (curEncoding == 'B') { 1.1269 + // bug 227290. ignore an extraneous '=' at the end. 1.1270 + // (# of characters in B-encoded part has to be a multiple of 4) 1.1271 + int32_t n = r - (q + 2); 1.1272 + R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0; 1.1273 + } 1.1274 + // Bug 493544. Don't decode the encoded text until it ends 1.1275 + if (R[-1] != '=' 1.1276 + && (prevCharset.IsEmpty() 1.1277 + || (curCharset == prevCharset && curEncoding == prevEncoding)) 1.1278 + ) { 1.1279 + encodedText.Append(q + 2, R - (q + 2)); 1.1280 + prevCharset = curCharset; 1.1281 + prevEncoding = curEncoding; 1.1282 + 1.1283 + begin = r + 2; 1.1284 + isLastEncodedWord = 1; 1.1285 + continue; 1.1286 + } 1.1287 + 1.1288 + bool bDecoded; // If the current line has been decoded. 1.1289 + bDecoded = false; 1.1290 + if (!encodedText.IsEmpty()) { 1.1291 + if (curCharset == prevCharset && curEncoding == prevEncoding) { 1.1292 + encodedText.Append(q + 2, R - (q + 2)); 1.1293 + bDecoded = true; 1.1294 + } 1.1295 + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), 1.1296 + prevEncoding, prevCharset.get(), aResult); 1.1297 + if (NS_FAILED(rv)) { 1.1298 + aResult.Append(encodedText); 1.1299 + } 1.1300 + encodedText.Truncate(); 1.1301 + prevCharset.Truncate(); 1.1302 + prevEncoding = '\0'; 1.1303 + } 1.1304 + if (!bDecoded) { 1.1305 + rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, 1.1306 + curCharset.get(), aResult); 1.1307 + if (NS_FAILED(rv)) { 1.1308 + aResult.Append(encodedText); 1.1309 + } 1.1310 + } 1.1311 + 1.1312 + begin = r + 2; 1.1313 + isLastEncodedWord = 1; 1.1314 + continue; 1.1315 + 1.1316 + badsyntax: 1.1317 + if (!encodedText.IsEmpty()) { 1.1318 + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), 1.1319 + prevEncoding, prevCharset.get(), aResult); 1.1320 + if (NS_FAILED(rv)) { 1.1321 + aResult.Append(encodedText); 1.1322 + } 1.1323 + encodedText.Truncate(); 1.1324 + prevCharset.Truncate(); 1.1325 + } 1.1326 + // copy the part before the encoded-word 1.1327 + aResult.Append(begin, p - begin); 1.1328 + begin = p; 1.1329 + isLastEncodedWord = 0; 1.1330 + } 1.1331 + 1.1332 + if (!encodedText.IsEmpty()) { 1.1333 + rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), 1.1334 + prevEncoding, prevCharset.get(), aResult); 1.1335 + if (NS_FAILED(rv)) { 1.1336 + aResult.Append(encodedText); 1.1337 + } 1.1338 + } 1.1339 + 1.1340 + // put the tail back 1.1341 + CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult); 1.1342 + 1.1343 + nsAutoCString tempStr(aResult); 1.1344 + tempStr.ReplaceChar('\t', ' '); 1.1345 + aResult = tempStr; 1.1346 + 1.1347 + return NS_OK; 1.1348 +}