michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set sw=4 ts=8 et tw=80 : */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include michael@0: #include "prmem.h" michael@0: #include "prprf.h" michael@0: #include "plstr.h" michael@0: #include "plbase64.h" michael@0: #include "nsCRT.h" michael@0: #include "nsMemory.h" michael@0: #include "nsTArray.h" michael@0: #include "nsCOMPtr.h" michael@0: #include "nsEscape.h" michael@0: #include "nsIUTF8ConverterService.h" michael@0: #include "nsUConvCID.h" michael@0: #include "nsIServiceManager.h" michael@0: #include "nsMIMEHeaderParamImpl.h" michael@0: #include "nsReadableUtils.h" michael@0: #include "nsNativeCharsetUtils.h" michael@0: #include "nsError.h" michael@0: #include "nsIUnicodeDecoder.h" michael@0: #include "mozilla/dom/EncodingUtils.h" michael@0: michael@0: using mozilla::dom::EncodingUtils; michael@0: michael@0: // static functions declared below are moved from mailnews/mime/src/comi18n.cpp michael@0: michael@0: static char *DecodeQ(const char *, uint32_t); michael@0: static bool Is7bitNonAsciiString(const char *, uint32_t); michael@0: static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &); michael@0: static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&); michael@0: static nsresult internalDecodeParameter(const nsACString&, const char*, michael@0: const char*, bool, bool, nsACString&); michael@0: michael@0: // XXX The chance of UTF-7 being used in the message header is really michael@0: // low, but in theory it's possible. michael@0: #define IS_7BIT_NON_ASCII_CHARSET(cset) \ michael@0: (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \ michael@0: !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \ michael@0: !nsCRT::strncasecmp((cset), "UTF-7", 5)) michael@0: michael@0: NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam) michael@0: michael@0: NS_IMETHODIMP michael@0: nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, michael@0: const char *aParamName, michael@0: const nsACString& aFallbackCharset, michael@0: bool aTryLocaleCharset, michael@0: char **aLang, nsAString& aResult) michael@0: { michael@0: return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING, michael@0: aFallbackCharset, aTryLocaleCharset, aLang, aResult); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, michael@0: const char *aParamName, michael@0: const nsACString& aFallbackCharset, michael@0: bool aTryLocaleCharset, michael@0: char **aLang, nsAString& aResult) michael@0: { michael@0: return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING, michael@0: aFallbackCharset, aTryLocaleCharset, aLang, aResult); michael@0: } michael@0: michael@0: // XXX : aTryLocaleCharset is not yet effective. michael@0: nsresult michael@0: nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, michael@0: const char *aParamName, michael@0: ParamDecoding aDecoding, michael@0: const nsACString& aFallbackCharset, michael@0: bool aTryLocaleCharset, michael@0: char **aLang, nsAString& aResult) michael@0: { michael@0: aResult.Truncate(); michael@0: nsresult rv; michael@0: michael@0: // get parameter (decode RFC 2231/5987 when applicable, as specified by michael@0: // aDecoding (5987 being a subset of 2231) and return charset.) michael@0: nsXPIDLCString med; michael@0: nsXPIDLCString charset; michael@0: rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, michael@0: aDecoding, getter_Copies(charset), aLang, michael@0: getter_Copies(med)); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: michael@0: // convert to UTF-8 after charset conversion and RFC 2047 decoding michael@0: // if necessary. michael@0: michael@0: nsAutoCString str1; michael@0: rv = internalDecodeParameter(med, charset.get(), nullptr, false, michael@0: // was aDecoding == MIME_FIELD_ENCODING michael@0: // see bug 875615 michael@0: true, michael@0: str1); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: if (!aFallbackCharset.IsEmpty()) michael@0: { michael@0: nsAutoCString charset; michael@0: EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset); michael@0: nsAutoCString str2; michael@0: nsCOMPtr michael@0: cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); michael@0: if (cvtUTF8 && michael@0: NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, michael@0: PromiseFlatCString(aFallbackCharset).get(), false, michael@0: !charset.EqualsLiteral("UTF-8"), michael@0: 1, str2))) { michael@0: CopyUTF8toUTF16(str2, aResult); michael@0: return NS_OK; michael@0: } michael@0: } michael@0: michael@0: if (IsUTF8(str1)) { michael@0: CopyUTF8toUTF16(str1, aResult); michael@0: return NS_OK; michael@0: } michael@0: michael@0: if (aTryLocaleCharset && !NS_IsNativeUTF8()) michael@0: return NS_CopyNativeToUnicode(str1, aResult); michael@0: michael@0: CopyASCIItoUTF16(str1, aResult); michael@0: return NS_OK; michael@0: } michael@0: michael@0: // remove backslash-encoded sequences from quoted-strings michael@0: // modifies string in place, potentially shortening it michael@0: void RemoveQuotedStringEscapes(char *src) michael@0: { michael@0: char *dst = src; michael@0: michael@0: for (char *c = src; *c; ++c) michael@0: { michael@0: if (c[0] == '\\' && c[1]) michael@0: { michael@0: // skip backslash if not at end michael@0: ++c; michael@0: } michael@0: *dst++ = *c; michael@0: } michael@0: *dst = 0; michael@0: } michael@0: michael@0: // true is character is a hex digit michael@0: bool IsHexDigit(char aChar) michael@0: { michael@0: char c = aChar; michael@0: michael@0: return (c >= 'a' && c <= 'f') || michael@0: (c >= 'A' && c <= 'F') || michael@0: (c >= '0' && c <= '9'); michael@0: } michael@0: michael@0: // validate that a C String containing %-escapes is syntactically valid michael@0: bool IsValidPercentEscaped(const char *aValue, int32_t len) michael@0: { michael@0: for (int32_t i = 0; i < len; i++) { michael@0: if (aValue[i] == '%') { michael@0: if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) { michael@0: return false; michael@0: } michael@0: } michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: // Support for continuations (RFC 2231, Section 3) michael@0: michael@0: // only a sane number supported michael@0: #define MAX_CONTINUATIONS 999 michael@0: michael@0: // part of a continuation michael@0: michael@0: class Continuation { michael@0: public: michael@0: Continuation(const char *aValue, uint32_t aLength, michael@0: bool aNeedsPercentDecoding, bool aWasQuotedString) { michael@0: value = aValue; michael@0: length = aLength; michael@0: needsPercentDecoding = aNeedsPercentDecoding; michael@0: wasQuotedString = aWasQuotedString; michael@0: } michael@0: Continuation() { michael@0: // empty constructor needed for nsTArray michael@0: value = 0L; michael@0: length = 0; michael@0: needsPercentDecoding = false; michael@0: wasQuotedString = false; michael@0: } michael@0: ~Continuation() {} michael@0: michael@0: const char *value; michael@0: uint32_t length; michael@0: bool needsPercentDecoding; michael@0: bool wasQuotedString; michael@0: }; michael@0: michael@0: // combine segments into a single string, returning the allocated string michael@0: // (or nullptr) while emptying the list michael@0: char *combineContinuations(nsTArray& aArray) michael@0: { michael@0: // Sanity check michael@0: if (aArray.Length() == 0) michael@0: return nullptr; michael@0: michael@0: // Get an upper bound for the length michael@0: uint32_t length = 0; michael@0: for (uint32_t i = 0; i < aArray.Length(); i++) { michael@0: length += aArray[i].length; michael@0: } michael@0: michael@0: // Allocate michael@0: char *result = (char *) nsMemory::Alloc(length + 1); michael@0: michael@0: // Concatenate michael@0: if (result) { michael@0: *result = '\0'; michael@0: michael@0: for (uint32_t i = 0; i < aArray.Length(); i++) { michael@0: Continuation cont = aArray[i]; michael@0: if (! cont.value) break; michael@0: michael@0: char *c = result + strlen(result); michael@0: strncat(result, cont.value, cont.length); michael@0: if (cont.needsPercentDecoding) { michael@0: nsUnescape(c); michael@0: } michael@0: if (cont.wasQuotedString) { michael@0: RemoveQuotedStringEscapes(c); michael@0: } michael@0: } michael@0: michael@0: // return null if empty value michael@0: if (*result == '\0') { michael@0: nsMemory::Free(result); michael@0: result = nullptr; michael@0: } michael@0: } else { michael@0: // Handle OOM michael@0: NS_WARNING("Out of memory\n"); michael@0: } michael@0: michael@0: return result; michael@0: } michael@0: michael@0: // add a continuation, return false on error if segment already has been seen michael@0: bool addContinuation(nsTArray& aArray, uint32_t aIndex, michael@0: const char *aValue, uint32_t aLength, michael@0: bool aNeedsPercentDecoding, bool aWasQuotedString) michael@0: { michael@0: if (aIndex < aArray.Length() && aArray[aIndex].value) { michael@0: NS_WARNING("duplicate RC2231 continuation segment #\n"); michael@0: return false; michael@0: } michael@0: michael@0: if (aIndex > MAX_CONTINUATIONS) { michael@0: NS_WARNING("RC2231 continuation segment # exceeds limit\n"); michael@0: return false; michael@0: } michael@0: michael@0: if (aNeedsPercentDecoding && aWasQuotedString) { michael@0: NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n"); michael@0: return false; michael@0: } michael@0: michael@0: Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString); michael@0: michael@0: if (aArray.Length() <= aIndex) { michael@0: aArray.SetLength(aIndex + 1); michael@0: } michael@0: aArray[aIndex] = cont; michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // parse a segment number; return -1 on error michael@0: int32_t parseSegmentNumber(const char *aValue, int32_t aLen) michael@0: { michael@0: if (aLen < 1) { michael@0: NS_WARNING("segment number missing\n"); michael@0: return -1; michael@0: } michael@0: michael@0: if (aLen > 1 && aValue[0] == '0') { michael@0: NS_WARNING("leading '0' not allowed in segment number\n"); michael@0: return -1; michael@0: } michael@0: michael@0: int32_t segmentNumber = 0; michael@0: michael@0: for (int32_t i = 0; i < aLen; i++) { michael@0: if (! (aValue[i] >= '0' && aValue[i] <= '9')) { michael@0: NS_WARNING("invalid characters in segment number\n"); michael@0: return -1; michael@0: } michael@0: michael@0: segmentNumber *= 10; michael@0: segmentNumber += aValue[i] - '0'; michael@0: if (segmentNumber > MAX_CONTINUATIONS) { michael@0: NS_WARNING("Segment number exceeds sane size\n"); michael@0: return -1; michael@0: } michael@0: } michael@0: michael@0: return segmentNumber; michael@0: } michael@0: michael@0: // validate a given octet sequence for compliance with the specified michael@0: // encoding michael@0: bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets) michael@0: { michael@0: nsCOMPtr cvtUTF8(do_GetService michael@0: (NS_UTF8CONVERTERSERVICE_CONTRACTID)); michael@0: if (!cvtUTF8) { michael@0: NS_WARNING("Can't get UTF8ConverterService\n"); michael@0: return false; michael@0: } michael@0: michael@0: nsAutoCString tmpRaw; michael@0: tmpRaw.Assign(aOctets); michael@0: nsAutoCString tmpDecoded; michael@0: michael@0: nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw, michael@0: PromiseFlatCString(aCharset).get(), michael@0: false, false, 1, tmpDecoded); michael@0: michael@0: if (rv != NS_OK) { michael@0: // we can't decode; charset may be unsupported, or the octet sequence michael@0: // is broken (illegal or incomplete octet sequence contained) michael@0: NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n"); michael@0: return false; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // moved almost verbatim from mimehdrs.cpp michael@0: // char * michael@0: // MimeHeaders_get_parameter (const char *header_value, const char *parm_name, michael@0: // char **charset, char **language) michael@0: // michael@0: // The format of these header lines is michael@0: // [ ';' '=' ]* michael@0: NS_IMETHODIMP michael@0: nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, michael@0: const char *aParamName, michael@0: char **aCharset, michael@0: char **aLang, michael@0: char **aResult) michael@0: { michael@0: return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING, michael@0: aCharset, aLang, aResult); michael@0: } michael@0: michael@0: michael@0: nsresult michael@0: nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, michael@0: const char *aParamName, michael@0: ParamDecoding aDecoding, michael@0: char **aCharset, michael@0: char **aLang, michael@0: char **aResult) michael@0: { michael@0: michael@0: if (!aHeaderValue || !*aHeaderValue || !aResult) michael@0: return NS_ERROR_INVALID_ARG; michael@0: michael@0: *aResult = nullptr; michael@0: michael@0: if (aCharset) *aCharset = nullptr; michael@0: if (aLang) *aLang = nullptr; michael@0: michael@0: nsAutoCString charset; michael@0: michael@0: // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable michael@0: // them for HTTP header fields later on, see bug 776324 michael@0: bool acceptContinuations = true; michael@0: michael@0: const char *str = aHeaderValue; michael@0: michael@0: // skip leading white space. michael@0: for (; *str && nsCRT::IsAsciiSpace(*str); ++str) michael@0: ; michael@0: const char *start = str; michael@0: michael@0: // aParamName is empty. return the first (possibly) _unnamed_ 'parameter' michael@0: // For instance, return 'inline' in the following case: michael@0: // Content-Disposition: inline; filename=..... michael@0: if (!aParamName || !*aParamName) michael@0: { michael@0: for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str) michael@0: ; michael@0: if (str == start) michael@0: return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY; michael@0: michael@0: *aResult = (char *) nsMemory::Clone(start, (str - start) + 1); michael@0: NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY); michael@0: (*aResult)[str - start] = '\0'; // null-terminate michael@0: return NS_OK; michael@0: } michael@0: michael@0: /* Skip forward to first ';' */ michael@0: for (; *str && *str != ';' && *str != ','; ++str) michael@0: ; michael@0: if (*str) michael@0: str++; michael@0: /* Skip over following whitespace */ michael@0: for (; *str && nsCRT::IsAsciiSpace(*str); ++str) michael@0: ; michael@0: michael@0: // Some broken http servers just specify parameters michael@0: // like 'filename' without specifying disposition michael@0: // method. Rewind to the first non-white-space michael@0: // character. michael@0: michael@0: if (!*str) michael@0: str = start; michael@0: michael@0: // RFC2231 - The legitimate parm format can be: michael@0: // A. title=ThisIsTitle michael@0: // B. title*=us-ascii'en-us'This%20is%20wierd. michael@0: // C. title*0*=us-ascii'en'This%20is%20wierd.%20We michael@0: // title*1*=have%20to%20support%20this. michael@0: // title*2="Else..." michael@0: // D. title*0="Hey, what you think you are doing?" michael@0: // title*1="There is no charset and lang info." michael@0: // RFC5987: only A and B michael@0: michael@0: // collect results for the different algorithms (plain filename, michael@0: // RFC5987/2231-encoded filename, + continuations) separately and decide michael@0: // which to use at the end michael@0: char *caseAResult = nullptr; michael@0: char *caseBResult = nullptr; michael@0: char *caseCDResult = nullptr; michael@0: michael@0: // collect continuation segments michael@0: nsTArray segments; michael@0: michael@0: michael@0: // our copies of the charset parameter, kept separately as they might michael@0: // differ for the two formats michael@0: nsDependentCSubstring charsetB, charsetCD; michael@0: michael@0: nsDependentCSubstring lang; michael@0: michael@0: int32_t paramLen = strlen(aParamName); michael@0: michael@0: while (*str) { michael@0: // find name/value michael@0: michael@0: const char *nameStart = str; michael@0: const char *nameEnd = nullptr; michael@0: const char *valueStart = str; michael@0: const char *valueEnd = nullptr; michael@0: bool isQuotedString = false; michael@0: michael@0: NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace."); michael@0: michael@0: // Skip forward to the end of this token. michael@0: for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++) michael@0: ; michael@0: nameEnd = str; michael@0: michael@0: int32_t nameLen = nameEnd - nameStart; michael@0: michael@0: // Skip over whitespace, '=', and whitespace michael@0: while (nsCRT::IsAsciiSpace(*str)) ++str; michael@0: if (!*str) { michael@0: break; michael@0: } michael@0: if (*str++ != '=') { michael@0: // don't accept parameters without "=" michael@0: goto increment_str; michael@0: } michael@0: while (nsCRT::IsAsciiSpace(*str)) ++str; michael@0: michael@0: if (*str != '"') { michael@0: // The value is a token, not a quoted string. michael@0: valueStart = str; michael@0: for (valueEnd = str; michael@0: *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';'; michael@0: valueEnd++) michael@0: ; michael@0: str = valueEnd; michael@0: } else { michael@0: isQuotedString = true; michael@0: michael@0: ++str; michael@0: valueStart = str; michael@0: for (valueEnd = str; *valueEnd; ++valueEnd) { michael@0: if (*valueEnd == '\\' && *(valueEnd + 1)) michael@0: ++valueEnd; michael@0: else if (*valueEnd == '"') michael@0: break; michael@0: } michael@0: str = valueEnd; michael@0: // *valueEnd != null means that *valueEnd is quote character. michael@0: if (*valueEnd) michael@0: str++; michael@0: } michael@0: michael@0: // See if this is the simplest case (case A above), michael@0: // a 'single' line value with no charset and lang. michael@0: // If so, copy it and return. michael@0: if (nameLen == paramLen && michael@0: !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) { michael@0: michael@0: if (caseAResult) { michael@0: // we already have one caseA result, ignore subsequent ones michael@0: goto increment_str; michael@0: } michael@0: michael@0: // if the parameter spans across multiple lines we have to strip out the michael@0: // line continuation -- jht 4/29/98 michael@0: nsAutoCString tempStr(valueStart, valueEnd - valueStart); michael@0: tempStr.StripChars("\r\n"); michael@0: char *res = ToNewCString(tempStr); michael@0: NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY); michael@0: michael@0: if (isQuotedString) michael@0: RemoveQuotedStringEscapes(res); michael@0: michael@0: caseAResult = res; michael@0: // keep going, we may find a RFC 2231/5987 encoded alternative michael@0: } michael@0: // case B, C, and D michael@0: else if (nameLen > paramLen && michael@0: !nsCRT::strncasecmp(nameStart, aParamName, paramLen) && michael@0: *(nameStart + paramLen) == '*') { michael@0: michael@0: // 1st char past '*' michael@0: const char *cp = nameStart + paramLen + 1; michael@0: michael@0: // if param name ends in "*" we need do to RFC5987 "ext-value" decoding michael@0: bool needExtDecoding = *(nameEnd - 1) == '*'; michael@0: michael@0: bool caseB = nameLen == paramLen + 1; michael@0: bool caseCStart = (*cp == '0') && needExtDecoding; michael@0: michael@0: // parse the segment number michael@0: int32_t segmentNumber = -1; michael@0: if (!caseB) { michael@0: int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0); michael@0: segmentNumber = parseSegmentNumber(cp, segLen); michael@0: michael@0: if (segmentNumber == -1) { michael@0: acceptContinuations = false; michael@0: goto increment_str; michael@0: } michael@0: } michael@0: michael@0: // CaseB and start of CaseC: requires charset and optional language michael@0: // in quotes (quotes required even if lang is blank) michael@0: if (caseB || (caseCStart && acceptContinuations)) { michael@0: // look for single quotation mark(') michael@0: const char *sQuote1 = PL_strchr(valueStart, 0x27); michael@0: const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr; michael@0: michael@0: // Two single quotation marks must be present even in michael@0: // absence of charset and lang. michael@0: if (!sQuote1 || !sQuote2) { michael@0: NS_WARNING("Mandatory two single quotes are missing in header parameter\n"); michael@0: } michael@0: michael@0: const char *charsetStart = nullptr; michael@0: int32_t charsetLength = 0; michael@0: const char *langStart = nullptr; michael@0: int32_t langLength = 0; michael@0: const char *rawValStart = nullptr; michael@0: int32_t rawValLength = 0; michael@0: michael@0: if (sQuote2 && sQuote1) { michael@0: // both delimiters present: charSet'lang'rawVal michael@0: rawValStart = sQuote2 + 1; michael@0: rawValLength = valueEnd - rawValStart; michael@0: michael@0: langStart = sQuote1 + 1; michael@0: langLength = sQuote2 - langStart; michael@0: michael@0: charsetStart = valueStart; michael@0: charsetLength = sQuote1 - charsetStart; michael@0: } michael@0: else if (sQuote1) { michael@0: // one delimiter; assume charset'rawVal michael@0: rawValStart = sQuote1 + 1; michael@0: rawValLength = valueEnd - rawValStart; michael@0: michael@0: charsetStart = valueStart; michael@0: charsetLength = sQuote1 - valueStart; michael@0: } michael@0: else { michael@0: // no delimiter: just rawVal michael@0: rawValStart = valueStart; michael@0: rawValLength = valueEnd - valueStart; michael@0: } michael@0: michael@0: if (langLength != 0) { michael@0: lang.Assign(langStart, langLength); michael@0: } michael@0: michael@0: // keep the charset for later michael@0: if (caseB) { michael@0: charsetB.Assign(charsetStart, charsetLength); michael@0: } else { michael@0: // if caseCorD michael@0: charsetCD.Assign(charsetStart, charsetLength); michael@0: } michael@0: michael@0: // non-empty value part michael@0: if (rawValLength > 0) { michael@0: if (!caseBResult && caseB) { michael@0: if (!IsValidPercentEscaped(rawValStart, rawValLength)) { michael@0: goto increment_str; michael@0: } michael@0: michael@0: // allocate buffer for the raw value michael@0: char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1); michael@0: if (!tmpResult) { michael@0: goto increment_str; michael@0: } michael@0: *(tmpResult + rawValLength) = 0; michael@0: michael@0: nsUnescape(tmpResult); michael@0: caseBResult = tmpResult; michael@0: } else { michael@0: // caseC michael@0: bool added = addContinuation(segments, 0, rawValStart, michael@0: rawValLength, needExtDecoding, michael@0: isQuotedString); michael@0: michael@0: if (!added) { michael@0: // continuation not added, stop processing them michael@0: acceptContinuations = false; michael@0: } michael@0: } michael@0: } michael@0: } // end of if-block : title*0*= or title*= michael@0: // caseD: a line of multiline param with no need for unescaping : title*[0-9]= michael@0: // or 2nd or later lines of a caseC param : title*[1-9]*= michael@0: else if (acceptContinuations && segmentNumber != -1) { michael@0: uint32_t valueLength = valueEnd - valueStart; michael@0: michael@0: bool added = addContinuation(segments, segmentNumber, valueStart, michael@0: valueLength, needExtDecoding, michael@0: isQuotedString); michael@0: michael@0: if (!added) { michael@0: // continuation not added, stop processing them michael@0: acceptContinuations = false; michael@0: } michael@0: } // end of if-block : title*[0-9]= or title*[1-9]*= michael@0: } michael@0: michael@0: // str now points after the end of the value. michael@0: // skip over whitespace, ';', whitespace. michael@0: increment_str: michael@0: while (nsCRT::IsAsciiSpace(*str)) ++str; michael@0: if (*str == ';') { michael@0: ++str; michael@0: } else { michael@0: // stop processing the header field; either we are done or the michael@0: // separator was missing michael@0: break; michael@0: } michael@0: while (nsCRT::IsAsciiSpace(*str)) ++str; michael@0: } michael@0: michael@0: caseCDResult = combineContinuations(segments); michael@0: michael@0: if (caseBResult && !charsetB.IsEmpty()) { michael@0: // check that the 2231/5987 result decodes properly given the michael@0: // specified character set michael@0: if (!IsValidOctetSequenceForCharset(charsetB, caseBResult)) michael@0: caseBResult = nullptr; michael@0: } michael@0: michael@0: if (caseCDResult && !charsetCD.IsEmpty()) { michael@0: // check that the 2231/5987 result decodes properly given the michael@0: // specified character set michael@0: if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult)) michael@0: caseCDResult = nullptr; michael@0: } michael@0: michael@0: if (caseBResult) { michael@0: // prefer simple 5987 format over 2231 with continuations michael@0: *aResult = caseBResult; michael@0: caseBResult = nullptr; michael@0: charset.Assign(charsetB); michael@0: } michael@0: else if (caseCDResult) { michael@0: // prefer 2231/5987 with or without continuations over plain format michael@0: *aResult = caseCDResult; michael@0: caseCDResult = nullptr; michael@0: charset.Assign(charsetCD); michael@0: } michael@0: else if (caseAResult) { michael@0: *aResult = caseAResult; michael@0: caseAResult = nullptr; michael@0: } michael@0: michael@0: // free unused stuff michael@0: nsMemory::Free(caseAResult); michael@0: nsMemory::Free(caseBResult); michael@0: nsMemory::Free(caseCDResult); michael@0: michael@0: // if we have a result michael@0: if (*aResult) { michael@0: // then return charset and lang as well michael@0: if (aLang && !lang.IsEmpty()) { michael@0: uint32_t len = lang.Length(); michael@0: *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1); michael@0: if (*aLang) { michael@0: *(*aLang + len) = 0; michael@0: } michael@0: } michael@0: if (aCharset && !charset.IsEmpty()) { michael@0: uint32_t len = charset.Length(); michael@0: *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1); michael@0: if (*aCharset) { michael@0: *(*aCharset + len) = 0; michael@0: } michael@0: } michael@0: } michael@0: michael@0: return *aResult ? NS_OK : NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: nsresult michael@0: internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset, michael@0: bool aOverrideCharset, bool aEatContinuations, michael@0: nsACString& aResult) michael@0: { michael@0: aResult.Truncate(); michael@0: if (!aHeaderVal) michael@0: return NS_ERROR_INVALID_ARG; michael@0: if (!*aHeaderVal) michael@0: return NS_OK; michael@0: michael@0: michael@0: // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but michael@0: // aDefaultCharset is specified, decodes RFC 2047 encoding and converts michael@0: // to UTF-8. Otherwise, just strips away CRLF. michael@0: if (PL_strstr(aHeaderVal, "=?") || michael@0: (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || michael@0: Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) { michael@0: DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult); michael@0: } else if (aEatContinuations && michael@0: (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) { michael@0: aResult = aHeaderVal; michael@0: } else { michael@0: aEatContinuations = false; michael@0: aResult = aHeaderVal; michael@0: } michael@0: michael@0: if (aEatContinuations) { michael@0: nsAutoCString temp(aResult); michael@0: temp.ReplaceSubstring("\n\t", " "); michael@0: temp.ReplaceSubstring("\r\t", " "); michael@0: temp.StripChars("\r\n"); michael@0: aResult = temp; michael@0: } michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, michael@0: const char* aDefaultCharset, michael@0: bool aOverrideCharset, michael@0: bool aEatContinuations, michael@0: nsACString& aResult) michael@0: { michael@0: return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset, michael@0: aOverrideCharset, aEatContinuations, michael@0: aResult); michael@0: } michael@0: michael@0: // true if the character is allowed in a RFC 5987 value michael@0: // see RFC 5987, Section 3.2.1, "attr-char" michael@0: bool IsRFC5987AttrChar(char aChar) michael@0: { michael@0: char c = aChar; michael@0: michael@0: return (c >= 'a' && c <= 'z') || michael@0: (c >= 'A' && c <= 'Z') || michael@0: (c >= '0' && c <= '9') || michael@0: (c == '!' || c == '#' || c == '$' || c == '&' || michael@0: c == '+' || c == '-' || c == '.' || c == '^' || michael@0: c == '_' || c == '`' || c == '|' || c == '~'); michael@0: } michael@0: michael@0: // percent-decode a value michael@0: // returns false on failure michael@0: bool PercentDecode(nsACString& aValue) michael@0: { michael@0: char *c = (char *) nsMemory::Alloc(aValue.Length() + 1); michael@0: if (!c) { michael@0: return false; michael@0: } michael@0: michael@0: strcpy(c, PromiseFlatCString(aValue).get()); michael@0: nsUnescape(c); michael@0: aValue.Assign(c); michael@0: nsMemory::Free(c); michael@0: michael@0: return true; michael@0: } michael@0: michael@0: // Decode a parameter value using the encoding defined in RFC 5987 michael@0: // michael@0: // charset "'" [ language ] "'" value-chars michael@0: NS_IMETHODIMP michael@0: nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal, michael@0: nsACString& aLang, michael@0: nsAString& aResult) michael@0: { michael@0: nsAutoCString charset; michael@0: nsAutoCString language; michael@0: nsAutoCString value; michael@0: michael@0: uint32_t delimiters = 0; michael@0: const char *encoded = PromiseFlatCString(aParamVal).get(); michael@0: const char *c = encoded; michael@0: michael@0: while (*c) { michael@0: char tc = *c++; michael@0: michael@0: if (tc == '\'') { michael@0: // single quote michael@0: delimiters++; michael@0: } else if (((unsigned char)tc) >= 128) { michael@0: // fail early, not ASCII michael@0: NS_WARNING("non-US-ASCII character in RFC5987-encoded param"); michael@0: return NS_ERROR_INVALID_ARG; michael@0: } else { michael@0: if (delimiters == 0) { michael@0: // valid characters are checked later implicitly michael@0: charset.Append(tc); michael@0: } else if (delimiters == 1) { michael@0: // no value checking for now michael@0: language.Append(tc); michael@0: } else if (delimiters == 2) { michael@0: if (IsRFC5987AttrChar(tc)) { michael@0: value.Append(tc); michael@0: } else if (tc == '%') { michael@0: if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) { michael@0: // we expect two more characters michael@0: NS_WARNING("broken %-escape in RFC5987-encoded param"); michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: value.Append(tc); michael@0: // we consume two more michael@0: value.Append(*c++); michael@0: value.Append(*c++); michael@0: } else { michael@0: // character not allowed here michael@0: NS_WARNING("invalid character in RFC5987-encoded param"); michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (delimiters != 2) { michael@0: NS_WARNING("missing delimiters in RFC5987-encoded param"); michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: // abort early for unsupported encodings michael@0: if (!charset.LowerCaseEqualsLiteral("utf-8")) { michael@0: NS_WARNING("unsupported charset in RFC5987-encoded param"); michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: // percent-decode michael@0: if (!PercentDecode(value)) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: michael@0: // return the encoding michael@0: aLang.Assign(language); michael@0: michael@0: // finally convert octet sequence to UTF-8 and be done michael@0: nsresult rv = NS_OK; michael@0: nsCOMPtr cvtUTF8 = michael@0: do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: nsAutoCString utf8; michael@0: rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: CopyUTF8toUTF16(utf8, aResult); michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: internalDecodeParameter(const nsACString& aParamValue, const char* aCharset, michael@0: const char* aDefaultCharset, bool aOverrideCharset, michael@0: bool aDecode2047, nsACString& aResult) michael@0: { michael@0: aResult.Truncate(); michael@0: // If aCharset is given, aParamValue was obtained from RFC2231/5987 michael@0: // encoding and we're pretty sure that it's in aCharset. michael@0: if (aCharset && *aCharset) michael@0: { michael@0: nsCOMPtr cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); michael@0: if (cvtUTF8) michael@0: return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset, michael@0: true, true, 1, aResult); michael@0: } michael@0: michael@0: const nsAFlatCString& param = PromiseFlatCString(aParamValue); michael@0: nsAutoCString unQuoted; michael@0: nsACString::const_iterator s, e; michael@0: param.BeginReading(s); michael@0: param.EndReading(e); michael@0: michael@0: // strip '\' when used to quote CR, LF, '"' and '\' michael@0: for ( ; s != e; ++s) { michael@0: if ((*s == '\\')) { michael@0: if (++s == e) { michael@0: --s; // '\' is at the end. move back and append '\'. michael@0: } michael@0: else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') { michael@0: --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\' michael@0: } michael@0: // else : skip '\' and append the quoted character. michael@0: } michael@0: unQuoted.Append(*s); michael@0: } michael@0: michael@0: aResult = unQuoted; michael@0: nsresult rv = NS_OK; michael@0: michael@0: if (aDecode2047) { michael@0: nsAutoCString decoded; michael@0: michael@0: // Try RFC 2047 encoding, instead. michael@0: rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset, michael@0: aOverrideCharset, true, decoded); michael@0: michael@0: if (NS_SUCCEEDED(rv) && !decoded.IsEmpty()) michael@0: aResult = decoded; michael@0: } michael@0: michael@0: return rv; michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue, michael@0: const char* aCharset, michael@0: const char* aDefaultCharset, michael@0: bool aOverrideCharset, michael@0: nsACString& aResult) michael@0: { michael@0: return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset, michael@0: aOverrideCharset, true, aResult); michael@0: } michael@0: michael@0: #define ISHEXCHAR(c) \ michael@0: ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \ michael@0: (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \ michael@0: (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66)) michael@0: michael@0: // Decode Q encoding (RFC 2047). michael@0: // static michael@0: char *DecodeQ(const char *in, uint32_t length) michael@0: { michael@0: char *out, *dest = 0; michael@0: michael@0: out = dest = (char *)PR_Calloc(length + 1, sizeof(char)); michael@0: if (dest == nullptr) michael@0: return nullptr; michael@0: while (length > 0) { michael@0: unsigned c = 0; michael@0: switch (*in) { michael@0: case '=': michael@0: // check if |in| in the form of '=hh' where h is [0-9a-fA-F]. michael@0: if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2])) michael@0: goto badsyntax; michael@0: PR_sscanf(in + 1, "%2X", &c); michael@0: *out++ = (char) c; michael@0: in += 3; michael@0: length -= 3; michael@0: break; michael@0: michael@0: case '_': michael@0: *out++ = ' '; michael@0: in++; michael@0: length--; michael@0: break; michael@0: michael@0: default: michael@0: if (*in & 0x80) goto badsyntax; michael@0: *out++ = *in++; michael@0: length--; michael@0: } michael@0: } michael@0: *out++ = '\0'; michael@0: michael@0: for (out = dest; *out ; ++out) { michael@0: if (*out == '\t') michael@0: *out = ' '; michael@0: } michael@0: michael@0: return dest; michael@0: michael@0: badsyntax: michael@0: PR_Free(dest); michael@0: return nullptr; michael@0: } michael@0: michael@0: // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) michael@0: // or has ESC which may be an indication that it's in one of many ISO michael@0: // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554). michael@0: // static michael@0: bool Is7bitNonAsciiString(const char *input, uint32_t len) michael@0: { michael@0: int32_t c; michael@0: michael@0: enum { hz_initial, // No HZ seen yet michael@0: hz_escaped, // Inside an HZ ~{ escape sequence michael@0: hz_seen, // Have seen at least one complete HZ sequence michael@0: hz_notpresent // Have seen something that is not legal HZ michael@0: } hz_state; michael@0: michael@0: hz_state = hz_initial; michael@0: while (len) { michael@0: c = uint8_t(*input++); michael@0: len--; michael@0: if (c & 0x80) return false; michael@0: if (c == 0x1B) return true; michael@0: if (c == '~') { michael@0: switch (hz_state) { michael@0: case hz_initial: michael@0: case hz_seen: michael@0: if (*input == '{') { michael@0: hz_state = hz_escaped; michael@0: } else if (*input == '~') { michael@0: // ~~ is the HZ encoding of ~. Skip over second ~ as well michael@0: hz_state = hz_seen; michael@0: input++; michael@0: len--; michael@0: } else { michael@0: hz_state = hz_notpresent; michael@0: } michael@0: break; michael@0: michael@0: case hz_escaped: michael@0: if (*input == '}') hz_state = hz_seen; michael@0: break; michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: return hz_state == hz_seen; michael@0: } michael@0: michael@0: #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD) michael@0: michael@0: // copy 'raw' sequences of octets in aInput to aOutput. michael@0: // If aDefaultCharset is specified, the input is assumed to be in the michael@0: // charset and converted to UTF-8. Otherwise, a blind copy is made. michael@0: // If aDefaultCharset is specified, but the conversion to UTF-8 michael@0: // is not successful, each octet is replaced by Unicode replacement michael@0: // chars. *aOutput is advanced by the number of output octets. michael@0: // static michael@0: void CopyRawHeader(const char *aInput, uint32_t aLen, michael@0: const char *aDefaultCharset, nsACString &aOutput) michael@0: { michael@0: int32_t c; michael@0: michael@0: // If aDefaultCharset is not specified, make a blind copy. michael@0: if (!aDefaultCharset || !*aDefaultCharset) { michael@0: aOutput.Append(aInput, aLen); michael@0: return; michael@0: } michael@0: michael@0: // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022 michael@0: // A ~ may indicate it is HZ michael@0: while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) { michael@0: aOutput.Append(char(c)); michael@0: aLen--; michael@0: } michael@0: if (!aLen) { michael@0: return; michael@0: } michael@0: aInput--; michael@0: michael@0: // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii michael@0: // string and aDefaultCharset is a 7bit non-ascii charset. michael@0: bool skipCheck = (c == 0x1B || c == '~') && michael@0: IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset); michael@0: michael@0: // If not UTF-8, treat as default charset michael@0: nsCOMPtr michael@0: cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID)); michael@0: nsAutoCString utf8Text; michael@0: if (cvtUTF8 && michael@0: NS_SUCCEEDED( michael@0: cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), michael@0: aDefaultCharset, skipCheck, true, 1, michael@0: utf8Text))) { michael@0: aOutput.Append(utf8Text); michael@0: } else { // replace each octet with Unicode replacement char in UTF-8. michael@0: for (uint32_t i = 0; i < aLen; i++) { michael@0: c = uint8_t(*aInput++); michael@0: if (c & 0x80) michael@0: aOutput.Append(REPLACEMENT_CHAR); michael@0: else michael@0: aOutput.Append(char(c)); michael@0: } michael@0: } michael@0: } michael@0: michael@0: nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64, michael@0: const char *aCharset, nsACString &aResult) michael@0: { michael@0: char *decodedText; michael@0: NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'"); michael@0: if(aQOrBase64 == 'Q') michael@0: decodedText = DecodeQ(aEncoded, aLen); michael@0: else if (aQOrBase64 == 'B') { michael@0: decodedText = PL_Base64Decode(aEncoded, aLen, nullptr); michael@0: } else { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: if (!decodedText) { michael@0: return NS_ERROR_INVALID_ARG; michael@0: } michael@0: michael@0: nsresult rv; michael@0: nsCOMPtr michael@0: cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv)); michael@0: nsAutoCString utf8Text; michael@0: if (NS_SUCCEEDED(rv)) { michael@0: // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset. michael@0: rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText), michael@0: aCharset, michael@0: IS_7BIT_NON_ASCII_CHARSET(aCharset), michael@0: true, 1, utf8Text); michael@0: } michael@0: PR_Free(decodedText); michael@0: if (NS_FAILED(rv)) { michael@0: return rv; michael@0: } michael@0: aResult.Append(utf8Text); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: static const char especials[] = "()<>@,;:\\\"/[]?.="; michael@0: michael@0: // |decode_mime_part2_str| taken from comi18n.c michael@0: // Decode RFC2047-encoded words in the input and convert the result to UTF-8. michael@0: // If aOverrideCharset is true, charset in RFC2047-encoded words is michael@0: // ignored and aDefaultCharset is assumed, instead. aDefaultCharset michael@0: // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8. michael@0: //static michael@0: nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, michael@0: bool aOverrideCharset, nsACString &aResult) michael@0: { michael@0: const char *p, *q = nullptr, *r; michael@0: const char *begin; // tracking pointer for where we are in the input buffer michael@0: int32_t isLastEncodedWord = 0; michael@0: const char *charsetStart, *charsetEnd; michael@0: nsAutoCString prevCharset, curCharset; michael@0: nsAutoCString encodedText; michael@0: char prevEncoding = '\0', curEncoding; michael@0: nsresult rv; michael@0: michael@0: begin = aHeader; michael@0: michael@0: // To avoid buffer realloc, if possible, set capacity in advance. No michael@0: // matter what, more than 3x expansion can never happen for all charsets michael@0: // supported by Mozilla. SCSU/BCSU with the sliding window set to a michael@0: // non-BMP block may be exceptions, but Mozilla does not support them. michael@0: // Neither any known mail/news program use them. Even if there's, we're michael@0: // safe because we don't use a raw *char any more. michael@0: aResult.SetCapacity(3 * strlen(aHeader)); michael@0: michael@0: while ((p = PL_strstr(begin, "=?")) != 0) { michael@0: if (isLastEncodedWord) { michael@0: // See if it's all whitespace. michael@0: for (q = begin; q < p; ++q) { michael@0: if (!PL_strchr(" \t\r\n", *q)) break; michael@0: } michael@0: } michael@0: michael@0: if (!isLastEncodedWord || q < p) { michael@0: if (!encodedText.IsEmpty()) { michael@0: rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), michael@0: prevEncoding, prevCharset.get(), aResult); michael@0: if (NS_FAILED(rv)) { michael@0: aResult.Append(encodedText); michael@0: } michael@0: encodedText.Truncate(); michael@0: prevCharset.Truncate(); michael@0: prevEncoding = '\0'; michael@0: } michael@0: // copy the part before the encoded-word michael@0: CopyRawHeader(begin, p - begin, aDefaultCharset, aResult); michael@0: begin = p; michael@0: } michael@0: michael@0: p += 2; michael@0: michael@0: // Get charset info michael@0: charsetStart = p; michael@0: charsetEnd = 0; michael@0: for (q = p; *q != '?'; q++) { michael@0: if (*q <= ' ' || PL_strchr(especials, *q)) { michael@0: goto badsyntax; michael@0: } michael@0: michael@0: // RFC 2231 section 5 michael@0: if (!charsetEnd && *q == '*') { michael@0: charsetEnd = q; michael@0: } michael@0: } michael@0: if (!charsetEnd) { michael@0: charsetEnd = q; michael@0: } michael@0: michael@0: q++; michael@0: curEncoding = nsCRT::ToUpper(*q); michael@0: if (curEncoding != 'Q' && curEncoding != 'B') michael@0: goto badsyntax; michael@0: michael@0: if (q[1] != '?') michael@0: goto badsyntax; michael@0: michael@0: r = q; michael@0: for (r = q + 2; *r != '?'; r++) { michael@0: if (*r < ' ') goto badsyntax; michael@0: } michael@0: if (r[1] != '=') michael@0: goto badsyntax; michael@0: else if (r == q + 2) { michael@0: // it's empty, skip michael@0: begin = r + 2; michael@0: isLastEncodedWord = 1; michael@0: continue; michael@0: } michael@0: michael@0: curCharset.Assign(charsetStart, charsetEnd - charsetStart); michael@0: // Override charset if requested. Never override labeled UTF-8. michael@0: // Use default charset instead of UNKNOWN-8BIT michael@0: if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8")) michael@0: || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT")) michael@0: ) { michael@0: curCharset = aDefaultCharset; michael@0: } michael@0: michael@0: const char *R; michael@0: R = r; michael@0: if (curEncoding == 'B') { michael@0: // bug 227290. ignore an extraneous '=' at the end. michael@0: // (# of characters in B-encoded part has to be a multiple of 4) michael@0: int32_t n = r - (q + 2); michael@0: R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0; michael@0: } michael@0: // Bug 493544. Don't decode the encoded text until it ends michael@0: if (R[-1] != '=' michael@0: && (prevCharset.IsEmpty() michael@0: || (curCharset == prevCharset && curEncoding == prevEncoding)) michael@0: ) { michael@0: encodedText.Append(q + 2, R - (q + 2)); michael@0: prevCharset = curCharset; michael@0: prevEncoding = curEncoding; michael@0: michael@0: begin = r + 2; michael@0: isLastEncodedWord = 1; michael@0: continue; michael@0: } michael@0: michael@0: bool bDecoded; // If the current line has been decoded. michael@0: bDecoded = false; michael@0: if (!encodedText.IsEmpty()) { michael@0: if (curCharset == prevCharset && curEncoding == prevEncoding) { michael@0: encodedText.Append(q + 2, R - (q + 2)); michael@0: bDecoded = true; michael@0: } michael@0: rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), michael@0: prevEncoding, prevCharset.get(), aResult); michael@0: if (NS_FAILED(rv)) { michael@0: aResult.Append(encodedText); michael@0: } michael@0: encodedText.Truncate(); michael@0: prevCharset.Truncate(); michael@0: prevEncoding = '\0'; michael@0: } michael@0: if (!bDecoded) { michael@0: rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding, michael@0: curCharset.get(), aResult); michael@0: if (NS_FAILED(rv)) { michael@0: aResult.Append(encodedText); michael@0: } michael@0: } michael@0: michael@0: begin = r + 2; michael@0: isLastEncodedWord = 1; michael@0: continue; michael@0: michael@0: badsyntax: michael@0: if (!encodedText.IsEmpty()) { michael@0: rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), michael@0: prevEncoding, prevCharset.get(), aResult); michael@0: if (NS_FAILED(rv)) { michael@0: aResult.Append(encodedText); michael@0: } michael@0: encodedText.Truncate(); michael@0: prevCharset.Truncate(); michael@0: } michael@0: // copy the part before the encoded-word michael@0: aResult.Append(begin, p - begin); michael@0: begin = p; michael@0: isLastEncodedWord = 0; michael@0: } michael@0: michael@0: if (!encodedText.IsEmpty()) { michael@0: rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(), michael@0: prevEncoding, prevCharset.get(), aResult); michael@0: if (NS_FAILED(rv)) { michael@0: aResult.Append(encodedText); michael@0: } michael@0: } michael@0: michael@0: // put the tail back michael@0: CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult); michael@0: michael@0: nsAutoCString tempStr(aResult); michael@0: tempStr.ReplaceChar('\t', ' '); michael@0: aResult = tempStr; michael@0: michael@0: return NS_OK; michael@0: }