The Tor Browser: comparison netwerk/mime/nsMIMEHeaderParamImpl.cpp

--1:000000000000
+:a3cb3a02a8ce
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set sw=4 ts=8 et tw=80 : */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+* License, v. 2.0. If a copy of the MPL was not distributed with this
+* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include <string.h>
+#include "prmem.h"
+#include "prprf.h"
+#include "plstr.h"
+#include "plbase64.h"
+#include "nsCRT.h"
+#include "nsMemory.h"
+#include "nsTArray.h"
+#include "nsCOMPtr.h"
+#include "nsEscape.h"
+#include "nsIUTF8ConverterService.h"
+#include "nsUConvCID.h"
+#include "nsIServiceManager.h"
+#include "nsMIMEHeaderParamImpl.h"
+#include "nsReadableUtils.h"
+#include "nsNativeCharsetUtils.h"
+#include "nsError.h"
+#include "nsIUnicodeDecoder.h"
+#include "mozilla/dom/EncodingUtils.h"
+using mozilla::dom::EncodingUtils;
+// static functions declared below are moved from mailnews/mime/src/comi18n.cpp
+static char *DecodeQ(const char *, uint32_t);
+static bool Is7bitNonAsciiString(const char *, uint32_t);
+static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
+static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
+static nsresult internalDecodeParameter(const nsACString&, const char*,
+const char*, bool, bool, nsACString&);
+// XXX The chance of UTF-7 being used in the message header is really
+// low, but in theory it's possible.
+#define IS_7BIT_NON_ASCII_CHARSET(cset)            \
+(!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
+!nsCRT::strncasecmp((cset), "HZ-GB", 5)    || \
+!nsCRT::strncasecmp((cset), "UTF-7", 5))
+NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
+const char *aParamName,
+const nsACString& aFallbackCharset,
+bool aTryLocaleCharset,
+char **aLang, nsAString& aResult)
+{
+return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
+aFallbackCharset, aTryLocaleCharset, aLang, aResult);
+}
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
+const char *aParamName,
+const nsACString& aFallbackCharset,
+bool aTryLocaleCharset,
+char **aLang, nsAString& aResult)
+{
+return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
+aFallbackCharset, aTryLocaleCharset, aLang, aResult);
+}
+// XXX : aTryLocaleCharset is not yet effective.
+nsresult
+nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
+const char *aParamName,
+ParamDecoding aDecoding,
+const nsACString& aFallbackCharset,
+bool aTryLocaleCharset,
+char **aLang, nsAString& aResult)
+{
+aResult.Truncate();
+nsresult rv;
+// get parameter (decode RFC 2231/5987 when applicable, as specified by
+// aDecoding (5987 being a subset of 2231) and return charset.)
+nsXPIDLCString med;
+nsXPIDLCString charset;
+rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
+aDecoding, getter_Copies(charset), aLang,
+getter_Copies(med));
+if (NS_FAILED(rv))
+return rv;
+// convert to UTF-8 after charset conversion and RFC 2047 decoding
+// if necessary.
+nsAutoCString str1;
+rv = internalDecodeParameter(med, charset.get(), nullptr, false,
+// was aDecoding == MIME_FIELD_ENCODING
+// see bug 875615
+true,
+str1);
+NS_ENSURE_SUCCESS(rv, rv);
+if (!aFallbackCharset.IsEmpty())
+{
+nsAutoCString charset;
+EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset);
+nsAutoCString str2;
+nsCOMPtr<nsIUTF8ConverterService>
+cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
+if (cvtUTF8 &&
+NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1,
+PromiseFlatCString(aFallbackCharset).get(), false,
+!charset.EqualsLiteral("UTF-8"),
+1, str2))) {
+CopyUTF8toUTF16(str2, aResult);
+return NS_OK;
+}
+}
+if (IsUTF8(str1)) {
+CopyUTF8toUTF16(str1, aResult);
+return NS_OK;
+}
+if (aTryLocaleCharset && !NS_IsNativeUTF8())
+return NS_CopyNativeToUnicode(str1, aResult);
+CopyASCIItoUTF16(str1, aResult);
+return NS_OK;
+}
+// remove backslash-encoded sequences from quoted-strings
+// modifies string in place, potentially shortening it
+void RemoveQuotedStringEscapes(char *src)
+{
+char *dst = src;
+for (char *c = src; *c; ++c)
+{
+if (c[0] == '\\' && c[1])
+{
+// skip backslash if not at end
+++c;
+}
+*dst++ = *c;
+}
+*dst = 0;
+}
+// true is character is a hex digit
+bool IsHexDigit(char aChar)
+{
+char c = aChar;
+return (c >= 'a' && c <= 'f') ||
+(c >= 'A' && c <= 'F') ||
+(c >= '0' && c <= '9');
+}
+// validate that a C String containing %-escapes is syntactically valid
+bool IsValidPercentEscaped(const char *aValue, int32_t len)
+{
+for (int32_t i = 0; i < len; i++) {
+if (aValue[i] == '%') {
+if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
+return false;
+}
+}
+}
+return true;
+}
+// Support for continuations (RFC 2231, Section 3)
+// only a sane number supported
+#define MAX_CONTINUATIONS 999
+// part of a continuation
+class Continuation {
+public:
+Continuation(const char *aValue, uint32_t aLength,
+bool aNeedsPercentDecoding, bool aWasQuotedString) {
+value = aValue;
+length = aLength;
+needsPercentDecoding = aNeedsPercentDecoding;
+wasQuotedString = aWasQuotedString;
+}
+Continuation() {
+// empty constructor needed for nsTArray
+value = 0L;
+length = 0;
+needsPercentDecoding = false;
+wasQuotedString = false;
+}
+~Continuation() {}
+const char *value;
+uint32_t length;
+bool needsPercentDecoding;
+bool wasQuotedString;
+};
+// combine segments into a single string, returning the allocated string
+// (or nullptr) while emptying the list
+char *combineContinuations(nsTArray<Continuation>& aArray)
+{
+// Sanity check
+if (aArray.Length() == 0)
+return nullptr;
+// Get an upper bound for the length
+uint32_t length = 0;
+for (uint32_t i = 0; i < aArray.Length(); i++) {
+length += aArray[i].length;
+}
+// Allocate
+char *result = (char *) nsMemory::Alloc(length + 1);
+// Concatenate
+if (result) {
+*result = '\0';
+for (uint32_t i = 0; i < aArray.Length(); i++) {
+Continuation cont = aArray[i];
+if (! cont.value) break;
+char *c = result + strlen(result);
+strncat(result, cont.value, cont.length);
+if (cont.needsPercentDecoding) {
+nsUnescape(c);
+}
+if (cont.wasQuotedString) {
+RemoveQuotedStringEscapes(c);
+}
+}
+// return null if empty value
+if (*result == '\0') {
+nsMemory::Free(result);
+result = nullptr;
+}
+} else {
+// Handle OOM
+NS_WARNING("Out of memory\n");
+}
+return result;
+}
+// add a continuation, return false on error if segment already has been seen
+bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
+const char *aValue, uint32_t aLength,
+bool aNeedsPercentDecoding, bool aWasQuotedString)
+{
+if (aIndex < aArray.Length() && aArray[aIndex].value) {
+NS_WARNING("duplicate RC2231 continuation segment #\n");
+return false;
+}
+if (aIndex > MAX_CONTINUATIONS) {
+NS_WARNING("RC2231 continuation segment # exceeds limit\n");
+return false;
+}
+if (aNeedsPercentDecoding && aWasQuotedString) {
+NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
+return false;
+}
+Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
+if (aArray.Length() <= aIndex) {
+aArray.SetLength(aIndex + 1);
+}
+aArray[aIndex] = cont;
+return true;
+}
+// parse a segment number; return -1 on error
+int32_t parseSegmentNumber(const char *aValue, int32_t aLen)
+{
+if (aLen < 1) {
+NS_WARNING("segment number missing\n");
+return -1;
+}
+if (aLen > 1 && aValue[0] == '0') {
+NS_WARNING("leading '0' not allowed in segment number\n");
+return -1;
+}
+int32_t segmentNumber = 0;
+for (int32_t i = 0; i < aLen; i++) {
+if (! (aValue[i] >= '0' && aValue[i] <= '9')) {
+NS_WARNING("invalid characters in segment number\n");
+return -1;
+}
+segmentNumber *= 10;
+segmentNumber += aValue[i] - '0';
+if (segmentNumber > MAX_CONTINUATIONS) {
+NS_WARNING("Segment number exceeds sane size\n");
+return -1;
+}
+}
+return segmentNumber;
+}
+// validate a given octet sequence for compliance with the specified
+// encoding
+bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
+{
+nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
+(NS_UTF8CONVERTERSERVICE_CONTRACTID));
+if (!cvtUTF8) {
+NS_WARNING("Can't get UTF8ConverterService\n");
+return false;
+}
+nsAutoCString tmpRaw;
+tmpRaw.Assign(aOctets);
+nsAutoCString tmpDecoded;
+nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
+PromiseFlatCString(aCharset).get(),
+false, false, 1, tmpDecoded);
+if (rv != NS_OK) {
+// we can't decode; charset may be unsupported, or the octet sequence
+// is broken (illegal or incomplete octet sequence contained)
+NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
+return false;
+}
+return true;
+}
+// moved almost verbatim from mimehdrs.cpp
+// char *
+// MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
+//                            char **charset, char **language)
+//
+// The format of these header lines  is
+// <token> [ ';' <token> '=' <token-or-quoted-string> ]*
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
+const char *aParamName,
+char **aCharset,
+char **aLang,
+char **aResult)
+{
+return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
+aCharset, aLang, aResult);
+}
+nsresult
+nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
+const char *aParamName,
+ParamDecoding aDecoding,
+char **aCharset,
+char **aLang,
+char **aResult)
+{
+if (!aHeaderValue ||  !*aHeaderValue || !aResult)
+return NS_ERROR_INVALID_ARG;
+*aResult = nullptr;
+if (aCharset) *aCharset = nullptr;
+if (aLang) *aLang = nullptr;
+nsAutoCString charset;
+// change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
+// them for HTTP header fields later on, see bug 776324
+bool acceptContinuations = true;
+const char *str = aHeaderValue;
+// skip leading white space.
+for (; *str &&  nsCRT::IsAsciiSpace(*str); ++str)
+;
+const char *start = str;
+// aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
+// For instance, return 'inline' in the following case:
+// Content-Disposition: inline; filename=.....
+if (!aParamName || !*aParamName)
+{
+for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
+;
+if (str == start)
+return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
+*aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
+NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
+(*aResult)[str - start] = '\0';  // null-terminate
+return NS_OK;
+}
+/* Skip forward to first ';' */
+for (; *str && *str != ';' && *str != ','; ++str)
+;
+if (*str)
+str++;
+/* Skip over following whitespace */
+for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
+;
+// Some broken http servers just specify parameters
+// like 'filename' without specifying disposition
+// method. Rewind to the first non-white-space
+// character.
+if (!*str)
+str = start;
+// RFC2231 - The legitimate parm format can be:
+// A. title=ThisIsTitle
+// B. title*=us-ascii'en-us'This%20is%20wierd.
+// C. title*0*=us-ascii'en'This%20is%20wierd.%20We
+//    title*1*=have%20to%20support%20this.
+//    title*2="Else..."
+// D. title*0="Hey, what you think you are doing?"
+//    title*1="There is no charset and lang info."
+// RFC5987: only A and B
+// collect results for the different algorithms (plain filename,
+// RFC5987/2231-encoded filename, + continuations) separately and decide
+// which to use at the end
+char *caseAResult = nullptr;
+char *caseBResult = nullptr;
+char *caseCDResult = nullptr;
+// collect continuation segments
+nsTArray<Continuation> segments;
+// our copies of the charset parameter, kept separately as they might
+// differ for the two formats
+nsDependentCSubstring charsetB, charsetCD;
+nsDependentCSubstring lang;
+int32_t paramLen = strlen(aParamName);
+while (*str) {
+// find name/value
+const char *nameStart = str;
+const char *nameEnd = nullptr;
+const char *valueStart = str;
+const char *valueEnd = nullptr;
+bool isQuotedString = false;
+NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
+// Skip forward to the end of this token.
+for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
+;
+nameEnd = str;
+int32_t nameLen = nameEnd - nameStart;
+// Skip over whitespace, '=', and whitespace
+while (nsCRT::IsAsciiSpace(*str)) ++str;
+if (!*str) {
+break;
+}
+if (*str++ != '=') {
+// don't accept parameters without "="
+goto increment_str;
+}
+while (nsCRT::IsAsciiSpace(*str)) ++str;
+if (*str != '"') {
+// The value is a token, not a quoted string.
+valueStart = str;
+for (valueEnd = str;
+*valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
+valueEnd++)
+;
+str = valueEnd;
+} else {
+isQuotedString = true;
+++str;
+valueStart = str;
+for (valueEnd = str; *valueEnd; ++valueEnd) {
+if (*valueEnd == '\\' && *(valueEnd + 1))
+++valueEnd;
+else if (*valueEnd == '"')
+break;
+}
+str = valueEnd;
+// *valueEnd != null means that *valueEnd is quote character.
+if (*valueEnd)
+str++;
+}
+// See if this is the simplest case (case A above),
+// a 'single' line value with no charset and lang.
+// If so, copy it and return.
+if (nameLen == paramLen &&
+!nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
+if (caseAResult) {
+// we already have one caseA result, ignore subsequent ones
+goto increment_str;
+}
+// if the parameter spans across multiple lines we have to strip out the
+//     line continuation -- jht 4/29/98
+nsAutoCString tempStr(valueStart, valueEnd - valueStart);
+tempStr.StripChars("\r\n");
+char *res = ToNewCString(tempStr);
+NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
+if (isQuotedString)
+RemoveQuotedStringEscapes(res);
+caseAResult = res;
+// keep going, we may find a RFC 2231/5987 encoded alternative
+}
+// case B, C, and D
+else if (nameLen > paramLen &&
+!nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
+*(nameStart + paramLen) == '*') {
+// 1st char past '*'
+const char *cp = nameStart + paramLen + 1;
+// if param name ends in "*" we need do to RFC5987 "ext-value" decoding
+bool needExtDecoding = *(nameEnd - 1) == '*';
+bool caseB = nameLen == paramLen + 1;
+bool caseCStart = (*cp == '0') && needExtDecoding;
+// parse the segment number
+int32_t segmentNumber = -1;
+if (!caseB) {
+int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
+segmentNumber = parseSegmentNumber(cp, segLen);
+if (segmentNumber == -1) {
+acceptContinuations = false;
+goto increment_str;
+}
+}
+// CaseB and start of CaseC: requires charset and optional language
+// in quotes (quotes required even if lang is blank)
+if (caseB || (caseCStart && acceptContinuations)) {
+// look for single quotation mark(')
+const char *sQuote1 = PL_strchr(valueStart, 0x27);
+const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
+// Two single quotation marks must be present even in
+// absence of charset and lang.
+if (!sQuote1 || !sQuote2) {
+NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
+}
+const char *charsetStart = nullptr;
+int32_t charsetLength = 0;
+const char *langStart = nullptr;
+int32_t langLength = 0;
+const char *rawValStart = nullptr;
+int32_t rawValLength = 0;
+if (sQuote2 && sQuote1) {
+// both delimiters present: charSet'lang'rawVal
+rawValStart = sQuote2 + 1;
+rawValLength = valueEnd - rawValStart;
+langStart = sQuote1 + 1;
+langLength = sQuote2 - langStart;
+charsetStart = valueStart;
+charsetLength = sQuote1 - charsetStart;
+}
+else if (sQuote1) {
+// one delimiter; assume charset'rawVal
+rawValStart = sQuote1 + 1;
+rawValLength = valueEnd - rawValStart;
+charsetStart = valueStart;
+charsetLength = sQuote1 - valueStart;
+}
+else {
+// no delimiter: just rawVal
+rawValStart = valueStart;
+rawValLength = valueEnd - valueStart;
+}
+if (langLength != 0) {
+lang.Assign(langStart, langLength);
+}
+// keep the charset for later
+if (caseB) {
+charsetB.Assign(charsetStart, charsetLength);
+} else {
+// if caseCorD
+charsetCD.Assign(charsetStart, charsetLength);
+}
+// non-empty value part
+if (rawValLength > 0) {
+if (!caseBResult && caseB) {
+if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
+goto increment_str;
+}
+// allocate buffer for the raw value
+char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1);
+if (!tmpResult) {
+goto increment_str;
+}
+*(tmpResult + rawValLength) = 0;
+nsUnescape(tmpResult);
+caseBResult = tmpResult;
+} else {
+// caseC
+bool added = addContinuation(segments, 0, rawValStart,
+rawValLength, needExtDecoding,
+isQuotedString);
+if (!added) {
+// continuation not added, stop processing them
+acceptContinuations = false;
+}
+}
+}
+}  // end of if-block :  title*0*=  or  title*=
+// caseD: a line of multiline param with no need for unescaping : title*[0-9]=
+// or 2nd or later lines of a caseC param : title*[1-9]*=
+else if (acceptContinuations && segmentNumber != -1) {
+uint32_t valueLength = valueEnd - valueStart;
+bool added = addContinuation(segments, segmentNumber, valueStart,
+valueLength, needExtDecoding,
+isQuotedString);
+if (!added) {
+// continuation not added, stop processing them
+acceptContinuations = false;
+}
+} // end of if-block :  title*[0-9]= or title*[1-9]*=
+}
+// str now points after the end of the value.
+//   skip over whitespace, ';', whitespace.
+increment_str:
+while (nsCRT::IsAsciiSpace(*str)) ++str;
+if (*str == ';') {
+++str;
+} else {
+// stop processing the header field; either we are done or the
+// separator was missing
+break;
+}
+while (nsCRT::IsAsciiSpace(*str)) ++str;
+}
+caseCDResult = combineContinuations(segments);
+if (caseBResult && !charsetB.IsEmpty()) {
+// check that the 2231/5987 result decodes properly given the
+// specified character set
+if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
+caseBResult = nullptr;
+}
+if (caseCDResult && !charsetCD.IsEmpty()) {
+// check that the 2231/5987 result decodes properly given the
+// specified character set
+if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
+caseCDResult = nullptr;
+}
+if (caseBResult) {
+// prefer simple 5987 format over 2231 with continuations
+*aResult = caseBResult;
+caseBResult = nullptr;
+charset.Assign(charsetB);
+}
+else if (caseCDResult) {
+// prefer 2231/5987 with or without continuations over plain format
+*aResult = caseCDResult;
+caseCDResult = nullptr;
+charset.Assign(charsetCD);
+}
+else if (caseAResult) {
+*aResult = caseAResult;
+caseAResult = nullptr;
+}
+// free unused stuff
+nsMemory::Free(caseAResult);
+nsMemory::Free(caseBResult);
+nsMemory::Free(caseCDResult);
+// if we have a result
+if (*aResult) {
+// then return charset and lang as well
+if (aLang && !lang.IsEmpty()) {
+uint32_t len = lang.Length();
+*aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1);
+if (*aLang) {
+*(*aLang + len) = 0;
+}
+}
+if (aCharset && !charset.IsEmpty()) {
+uint32_t len = charset.Length();
+*aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1);
+if (*aCharset) {
+*(*aCharset + len) = 0;
+}
+}
+}
+return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
+}
+nsresult
+internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
+bool aOverrideCharset, bool aEatContinuations,
+nsACString& aResult)
+{
+aResult.Truncate();
+if (!aHeaderVal)
+return NS_ERROR_INVALID_ARG;
+if (!*aHeaderVal)
+return NS_OK;
+// If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string  but
+// aDefaultCharset is specified, decodes RFC 2047 encoding and converts
+// to UTF-8. Otherwise, just strips away CRLF.
+if (PL_strstr(aHeaderVal, "=?") ||
+(aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
+Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
+DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
+} else if (aEatContinuations &&
+(PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
+aResult = aHeaderVal;
+} else {
+aEatContinuations = false;
+aResult = aHeaderVal;
+}
+if (aEatContinuations) {
+nsAutoCString temp(aResult);
+temp.ReplaceSubstring("\n\t", " ");
+temp.ReplaceSubstring("\r\t", " ");
+temp.StripChars("\r\n");
+aResult = temp;
+}
+return NS_OK;
+}
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
+const char* aDefaultCharset,
+bool aOverrideCharset,
+bool aEatContinuations,
+nsACString& aResult)
+{
+return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
+aOverrideCharset, aEatContinuations,
+aResult);
+}
+// true if the character is allowed in a RFC 5987 value
+// see RFC 5987, Section 3.2.1, "attr-char"
+bool IsRFC5987AttrChar(char aChar)
+{
+char c = aChar;
+return (c >= 'a' && c <= 'z') ||
+(c >= 'A' && c <= 'Z') ||
+(c >= '0' && c <= '9') ||
+(c == '!' || c == '#' || c == '$' || c == '&' ||
+c == '+' || c == '-' || c == '.' || c == '^' ||
+c == '_' || c == '`' || c == '|' || c == '~');
+}
+// percent-decode a value
+// returns false on failure
+bool PercentDecode(nsACString& aValue)
+{
+char *c = (char *) nsMemory::Alloc(aValue.Length() + 1);
+if (!c) {
+return false;
+}
+strcpy(c, PromiseFlatCString(aValue).get());
+nsUnescape(c);
+aValue.Assign(c);
+nsMemory::Free(c);
+return true;
+}
+// Decode a parameter value using the encoding defined in RFC 5987
+//
+// charset  "'" [ language ] "'" value-chars
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
+nsACString& aLang,
+nsAString& aResult)
+{
+nsAutoCString charset;
+nsAutoCString language;
+nsAutoCString value;
+uint32_t delimiters = 0;
+const char *encoded = PromiseFlatCString(aParamVal).get();
+const char *c = encoded;
+while (*c) {
+char tc = *c++;
+if (tc == '\'') {
+// single quote
+delimiters++;
+} else if (((unsigned char)tc) >= 128) {
+// fail early, not ASCII
+NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
+return NS_ERROR_INVALID_ARG;
+} else {
+if (delimiters == 0) {
+// valid characters are checked later implicitly
+charset.Append(tc);
+} else if (delimiters == 1) {
+// no value checking for now
+language.Append(tc);
+} else if (delimiters == 2) {
+if (IsRFC5987AttrChar(tc)) {
+value.Append(tc);
+} else if (tc == '%') {
+if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
+// we expect two more characters
+NS_WARNING("broken %-escape in RFC5987-encoded param");
+return NS_ERROR_INVALID_ARG;
+}
+value.Append(tc);
+// we consume two more
+value.Append(*c++);
+value.Append(*c++);
+} else {
+// character not allowed here
+NS_WARNING("invalid character in RFC5987-encoded param");
+return NS_ERROR_INVALID_ARG;
+}
+}
+}
+}
+if (delimiters != 2) {
+NS_WARNING("missing delimiters in RFC5987-encoded param");
+return NS_ERROR_INVALID_ARG;
+}
+// abort early for unsupported encodings
+if (!charset.LowerCaseEqualsLiteral("utf-8")) {
+NS_WARNING("unsupported charset in RFC5987-encoded param");
+return NS_ERROR_INVALID_ARG;
+}
+// percent-decode
+if (!PercentDecode(value)) {
+return NS_ERROR_OUT_OF_MEMORY;
+}
+// return the encoding
+aLang.Assign(language);
+// finally convert octet sequence to UTF-8 and be done
+nsresult rv = NS_OK;
+nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 =
+do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv);
+NS_ENSURE_SUCCESS(rv, rv);
+nsAutoCString utf8;
+rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8);
+NS_ENSURE_SUCCESS(rv, rv);
+CopyUTF8toUTF16(utf8, aResult);
+return NS_OK;
+}
+nsresult
+internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
+const char* aDefaultCharset, bool aOverrideCharset,
+bool aDecode2047, nsACString& aResult)
+{
+aResult.Truncate();
+// If aCharset is given, aParamValue was obtained from RFC2231/5987
+// encoding and we're pretty sure that it's in aCharset.
+if (aCharset && *aCharset)
+{
+nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
+if (cvtUTF8)
+return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
+true, true, 1, aResult);
+}
+const nsAFlatCString& param = PromiseFlatCString(aParamValue);
+nsAutoCString unQuoted;
+nsACString::const_iterator s, e;
+param.BeginReading(s);
+param.EndReading(e);
+// strip '\' when used to quote CR, LF, '"' and '\'
+for ( ; s != e; ++s) {
+if ((*s == '\\')) {
+if (++s == e) {
+--s; // '\' is at the end. move back and append '\'.
+}
+else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
+--s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
+}
+// else : skip '\' and append the quoted character.
+}
+unQuoted.Append(*s);
+}
+aResult = unQuoted;
+nsresult rv = NS_OK;
+if (aDecode2047) {
+nsAutoCString decoded;
+// Try RFC 2047 encoding, instead.
+rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
+aOverrideCharset, true, decoded);
+if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
+aResult = decoded;
+}
+return rv;
+}
+NS_IMETHODIMP
+nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
+const char* aCharset,
+const char* aDefaultCharset,
+bool aOverrideCharset,
+nsACString& aResult)
+{
+return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
+aOverrideCharset, true, aResult);
+}
+#define ISHEXCHAR(c) \
+((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39)  ||  \
+(0x41 <= uint8_t(c) && uint8_t(c) <= 0x46)  ||  \
+(0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
+// Decode Q encoding (RFC 2047).
+// static
+char *DecodeQ(const char *in, uint32_t length)
+{
+char *out, *dest = 0;
+out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
+if (dest == nullptr)
+return nullptr;
+while (length > 0) {
+unsigned c = 0;
+switch (*in) {
+case '=':
+// check if |in| in the form of '=hh'  where h is [0-9a-fA-F].
+if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
+goto badsyntax;
+PR_sscanf(in + 1, "%2X", &c);
+*out++ = (char) c;
+in += 3;
+length -= 3;
+break;
+case '_':
+*out++ = ' ';
+in++;
+length--;
+break;
+default:
+if (*in & 0x80) goto badsyntax;
+*out++ = *in++;
+length--;
+}
+}
+*out++ = '\0';
+for (out = dest; *out ; ++out) {
+if (*out == '\t')
+*out = ' ';
+}
+return dest;
+badsyntax:
+PR_Free(dest);
+return nullptr;
+}
+// check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
+// or has  ESC which may be an  indication that  it's in one of many ISO
+// 2022 7bit  encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
+// static
+bool Is7bitNonAsciiString(const char *input, uint32_t len)
+{
+int32_t c;
+enum { hz_initial, // No HZ seen yet
+hz_escaped, // Inside an HZ ~{ escape sequence
+hz_seen, // Have seen at least one complete HZ sequence
+hz_notpresent // Have seen something that is not legal HZ
+} hz_state;
+hz_state = hz_initial;
+while (len) {
+c = uint8_t(*input++);
+len--;
+if (c & 0x80) return false;
+if (c == 0x1B) return true;
+if (c == '~') {
+switch (hz_state) {
+case hz_initial:
+case hz_seen:
+if (*input == '{') {
+hz_state = hz_escaped;
+} else if (*input == '~') {
+// ~~ is the HZ encoding of ~.  Skip over second ~ as well
+hz_state = hz_seen;
+input++;
+len--;
+} else {
+hz_state = hz_notpresent;
+}
+break;
+case hz_escaped:
+if (*input == '}') hz_state = hz_seen;
+break;
+default:
+break;
+}
+}
+}
+return hz_state == hz_seen;
+}
+#define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
+// copy 'raw' sequences of octets in aInput to aOutput.
+// If aDefaultCharset is specified, the input is assumed to be in the
+// charset and converted to UTF-8. Otherwise, a blind copy is made.
+// If aDefaultCharset is specified, but the conversion to UTF-8
+// is not successful, each octet is replaced by Unicode replacement
+// chars. *aOutput is advanced by the number of output octets.
+// static
+void CopyRawHeader(const char *aInput, uint32_t aLen,
+const char *aDefaultCharset, nsACString &aOutput)
+{
+int32_t c;
+// If aDefaultCharset is not specified, make a blind copy.
+if (!aDefaultCharset || !*aDefaultCharset) {
+aOutput.Append(aInput, aLen);
+return;
+}
+// Copy as long as it's US-ASCII.  An ESC may indicate ISO 2022
+// A ~ may indicate it is HZ
+while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
+aOutput.Append(char(c));
+aLen--;
+}
+if (!aLen) {
+return;
+}
+aInput--;
+// skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
+// string and aDefaultCharset is a 7bit non-ascii charset.
+bool skipCheck = (c == 0x1B || c == '~') &&
+IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
+// If not UTF-8, treat as default charset
+nsCOMPtr<nsIUTF8ConverterService>
+cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
+nsAutoCString utf8Text;
+if (cvtUTF8 &&
+NS_SUCCEEDED(
+cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen),
+aDefaultCharset, skipCheck, true, 1,
+utf8Text))) {
+aOutput.Append(utf8Text);
+} else { // replace each octet with Unicode replacement char in UTF-8.
+for (uint32_t i = 0; i < aLen; i++) {
+c = uint8_t(*aInput++);
+if (c & 0x80)
+aOutput.Append(REPLACEMENT_CHAR);
+else
+aOutput.Append(char(c));
+}
+}
+}
+nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64,
+const char *aCharset, nsACString &aResult)
+{
+char *decodedText;
+NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
+if(aQOrBase64 == 'Q')
+decodedText = DecodeQ(aEncoded, aLen);
+else if (aQOrBase64 == 'B') {
+decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
+} else {
+return NS_ERROR_INVALID_ARG;
+}
+if (!decodedText) {
+return NS_ERROR_INVALID_ARG;
+}
+nsresult rv;
+nsCOMPtr<nsIUTF8ConverterService>
+cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
+nsAutoCString utf8Text;
+if (NS_SUCCEEDED(rv)) {
+// skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
+rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
+aCharset,
+IS_7BIT_NON_ASCII_CHARSET(aCharset),
+true, 1, utf8Text);
+}
+PR_Free(decodedText);
+if (NS_FAILED(rv)) {
+return rv;
+}
+aResult.Append(utf8Text);
+return NS_OK;
+}
+static const char especials[] = "()<>@,;:\\\"/[]?.=";
+// |decode_mime_part2_str| taken from comi18n.c
+// Decode RFC2047-encoded words in the input and convert the result to UTF-8.
+// If aOverrideCharset is true, charset in RFC2047-encoded words is
+// ignored and aDefaultCharset is assumed, instead. aDefaultCharset
+// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
+//static
+nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset,
+bool aOverrideCharset, nsACString &aResult)
+{
+const char *p, *q = nullptr, *r;
+const char *begin; // tracking pointer for where we are in the input buffer
+int32_t isLastEncodedWord = 0;
+const char *charsetStart, *charsetEnd;
+nsAutoCString prevCharset, curCharset;
+nsAutoCString encodedText;
+char prevEncoding = '\0', curEncoding;
+nsresult rv;
+begin = aHeader;
+// To avoid buffer realloc, if possible, set capacity in advance. No
+// matter what,  more than 3x expansion can never happen for all charsets
+// supported by Mozilla. SCSU/BCSU with the sliding window set to a
+// non-BMP block may be exceptions, but Mozilla does not support them.
+// Neither any known mail/news program use them. Even if there's, we're
+// safe because we don't use a raw *char any more.
+aResult.SetCapacity(3 * strlen(aHeader));
+while ((p = PL_strstr(begin, "=?")) != 0) {
+if (isLastEncodedWord) {
+// See if it's all whitespace.
+for (q = begin; q < p; ++q) {
+if (!PL_strchr(" \t\r\n", *q)) break;
+}
+}
+if (!isLastEncodedWord || q < p) {
+if (!encodedText.IsEmpty()) {
+rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+prevEncoding, prevCharset.get(), aResult);
+if (NS_FAILED(rv)) {
+aResult.Append(encodedText);
+}
+encodedText.Truncate();
+prevCharset.Truncate();
+prevEncoding = '\0';
+}
+// copy the part before the encoded-word
+CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
+begin = p;
+}
+p += 2;
+// Get charset info
+charsetStart = p;
+charsetEnd = 0;
+for (q = p; *q != '?'; q++) {
+if (*q <= ' ' || PL_strchr(especials, *q)) {
+goto badsyntax;
+}
+// RFC 2231 section 5
+if (!charsetEnd && *q == '*') {
+charsetEnd = q;
+}
+}
+if (!charsetEnd) {
+charsetEnd = q;
+}
+q++;
+curEncoding = nsCRT::ToUpper(*q);
+if (curEncoding != 'Q' && curEncoding != 'B')
+goto badsyntax;
+if (q[1] != '?')
+goto badsyntax;
+r = q;
+for (r = q + 2; *r != '?'; r++) {
+if (*r < ' ') goto badsyntax;
+}
+if (r[1] != '=')
+goto badsyntax;
+else if (r == q + 2) {
+// it's empty, skip
+begin = r + 2;
+isLastEncodedWord = 1;
+continue;
+}
+curCharset.Assign(charsetStart, charsetEnd - charsetStart);
+// Override charset if requested.  Never override labeled UTF-8.
+// Use default charset instead of UNKNOWN-8BIT
+if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8"))
+|| (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))
+) {
+curCharset = aDefaultCharset;
+}
+const char *R;
+R = r;
+if (curEncoding == 'B') {
+// bug 227290. ignore an extraneous '=' at the end.
+// (# of characters in B-encoded part has to be a multiple of 4)
+int32_t n = r - (q + 2);
+R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
+}
+// Bug 493544. Don't decode the encoded text until it ends
+if (R[-1] != '='
+&& (prevCharset.IsEmpty()
+|| (curCharset == prevCharset && curEncoding == prevEncoding))
+) {
+encodedText.Append(q + 2, R - (q + 2));
+prevCharset = curCharset;
+prevEncoding = curEncoding;
+begin = r + 2;
+isLastEncodedWord = 1;
+continue;
+}
+bool bDecoded; // If the current line has been decoded.
+bDecoded = false;
+if (!encodedText.IsEmpty()) {
+if (curCharset == prevCharset && curEncoding == prevEncoding) {
+encodedText.Append(q + 2, R - (q + 2));
+bDecoded = true;
+}
+rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+prevEncoding, prevCharset.get(), aResult);
+if (NS_FAILED(rv)) {
+aResult.Append(encodedText);
+}
+encodedText.Truncate();
+prevCharset.Truncate();
+prevEncoding = '\0';
+}
+if (!bDecoded) {
+rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding,
+curCharset.get(), aResult);
+if (NS_FAILED(rv)) {
+aResult.Append(encodedText);
+}
+}
+begin = r + 2;
+isLastEncodedWord = 1;
+continue;
+badsyntax:
+if (!encodedText.IsEmpty()) {
+rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+prevEncoding, prevCharset.get(), aResult);
+if (NS_FAILED(rv)) {
+aResult.Append(encodedText);
+}
+encodedText.Truncate();
+prevCharset.Truncate();
+}
+// copy the part before the encoded-word
+aResult.Append(begin, p - begin);
+begin = p;
+isLastEncodedWord = 0;
+}
+if (!encodedText.IsEmpty()) {
+rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
+prevEncoding, prevCharset.get(), aResult);
+if (NS_FAILED(rv)) {
+aResult.Append(encodedText);
+}
+}
+// put the tail back
+CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
+nsAutoCString tempStr(aResult);
+tempStr.ReplaceChar('\t', ' ');
+aResult = tempStr;
+return NS_OK;
+}

The Tor Browser / file comparison

comparison: netwerk/mime/nsMIMEHeaderParamImpl.cpp

netwerk/mime/nsMIMEHeaderParamImpl.cpp