michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0: /* vim: set sw=4 ts=8 et tw=80 : */
michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0:  * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0:  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0: 
michael@0: #include <string.h>
michael@0: #include "prmem.h"
michael@0: #include "prprf.h"
michael@0: #include "plstr.h"
michael@0: #include "plbase64.h"
michael@0: #include "nsCRT.h"
michael@0: #include "nsMemory.h"
michael@0: #include "nsTArray.h"
michael@0: #include "nsCOMPtr.h"
michael@0: #include "nsEscape.h"
michael@0: #include "nsIUTF8ConverterService.h"
michael@0: #include "nsUConvCID.h"
michael@0: #include "nsIServiceManager.h"
michael@0: #include "nsMIMEHeaderParamImpl.h"
michael@0: #include "nsReadableUtils.h"
michael@0: #include "nsNativeCharsetUtils.h"
michael@0: #include "nsError.h"
michael@0: #include "nsIUnicodeDecoder.h"
michael@0: #include "mozilla/dom/EncodingUtils.h"
michael@0: 
michael@0: using mozilla::dom::EncodingUtils;
michael@0: 
michael@0: // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
michael@0:   
michael@0: static char *DecodeQ(const char *, uint32_t);
michael@0: static bool Is7bitNonAsciiString(const char *, uint32_t);
michael@0: static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
michael@0: static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
michael@0: static nsresult internalDecodeParameter(const nsACString&, const char*,
michael@0:                                         const char*, bool, bool, nsACString&);
michael@0: 
michael@0: // XXX The chance of UTF-7 being used in the message header is really
michael@0: // low, but in theory it's possible. 
michael@0: #define IS_7BIT_NON_ASCII_CHARSET(cset)            \
michael@0:     (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
michael@0:      !nsCRT::strncasecmp((cset), "HZ-GB", 5)    || \
michael@0:      !nsCRT::strncasecmp((cset), "UTF-7", 5))   
michael@0: 
michael@0: NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
michael@0: 
michael@0: NS_IMETHODIMP 
michael@0: nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, 
michael@0:                                     const char *aParamName,
michael@0:                                     const nsACString& aFallbackCharset, 
michael@0:                                     bool aTryLocaleCharset, 
michael@0:                                     char **aLang, nsAString& aResult)
michael@0: {
michael@0:   return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
michael@0:                         aFallbackCharset, aTryLocaleCharset, aLang, aResult);
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP 
michael@0: nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, 
michael@0:                                         const char *aParamName,
michael@0:                                         const nsACString& aFallbackCharset, 
michael@0:                                         bool aTryLocaleCharset, 
michael@0:                                         char **aLang, nsAString& aResult)
michael@0: {
michael@0:   return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
michael@0:                         aFallbackCharset, aTryLocaleCharset, aLang, aResult);
michael@0: }
michael@0: 
michael@0: // XXX : aTryLocaleCharset is not yet effective.
michael@0: nsresult 
michael@0: nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, 
michael@0:                                       const char *aParamName,
michael@0:                                       ParamDecoding aDecoding,
michael@0:                                       const nsACString& aFallbackCharset, 
michael@0:                                       bool aTryLocaleCharset, 
michael@0:                                       char **aLang, nsAString& aResult)
michael@0: {
michael@0:     aResult.Truncate();
michael@0:     nsresult rv;
michael@0: 
michael@0:     // get parameter (decode RFC 2231/5987 when applicable, as specified by
michael@0:     // aDecoding (5987 being a subset of 2231) and return charset.)
michael@0:     nsXPIDLCString med;
michael@0:     nsXPIDLCString charset;
michael@0:     rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, 
michael@0:                              aDecoding, getter_Copies(charset), aLang, 
michael@0:                              getter_Copies(med));
michael@0:     if (NS_FAILED(rv))
michael@0:         return rv; 
michael@0: 
michael@0:     // convert to UTF-8 after charset conversion and RFC 2047 decoding 
michael@0:     // if necessary.
michael@0:     
michael@0:     nsAutoCString str1;
michael@0:     rv = internalDecodeParameter(med, charset.get(), nullptr, false,
michael@0:                                  // was aDecoding == MIME_FIELD_ENCODING
michael@0:                                  // see bug 875615
michael@0:                                  true,
michael@0:                                  str1);
michael@0:     NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:     if (!aFallbackCharset.IsEmpty())
michael@0:     {
michael@0:         nsAutoCString charset;
michael@0:         EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset);
michael@0:         nsAutoCString str2;
michael@0:         nsCOMPtr<nsIUTF8ConverterService> 
michael@0:           cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0:         if (cvtUTF8 &&
michael@0:             NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, 
michael@0:                 PromiseFlatCString(aFallbackCharset).get(), false,
michael@0:                                    !charset.EqualsLiteral("UTF-8"),
michael@0:                                    1, str2))) {
michael@0:           CopyUTF8toUTF16(str2, aResult);
michael@0:           return NS_OK;
michael@0:         }
michael@0:     }
michael@0: 
michael@0:     if (IsUTF8(str1)) {
michael@0:       CopyUTF8toUTF16(str1, aResult);
michael@0:       return NS_OK;
michael@0:     }
michael@0: 
michael@0:     if (aTryLocaleCharset && !NS_IsNativeUTF8()) 
michael@0:       return NS_CopyNativeToUnicode(str1, aResult);
michael@0: 
michael@0:     CopyASCIItoUTF16(str1, aResult);
michael@0:     return NS_OK;
michael@0: }
michael@0: 
michael@0: // remove backslash-encoded sequences from quoted-strings
michael@0: // modifies string in place, potentially shortening it
michael@0: void RemoveQuotedStringEscapes(char *src)
michael@0: {
michael@0:   char *dst = src;
michael@0: 
michael@0:   for (char *c = src; *c; ++c)
michael@0:   {
michael@0:     if (c[0] == '\\' && c[1])
michael@0:     {
michael@0:       // skip backslash if not at end
michael@0:       ++c;
michael@0:     }
michael@0:     *dst++ = *c;
michael@0:   }
michael@0:   *dst = 0;
michael@0: }
michael@0: 
michael@0: // true is character is a hex digit
michael@0: bool IsHexDigit(char aChar)
michael@0: {
michael@0:   char c = aChar;
michael@0: 
michael@0:   return (c >= 'a' && c <= 'f') ||
michael@0:          (c >= 'A' && c <= 'F') ||
michael@0:          (c >= '0' && c <= '9');
michael@0: }
michael@0: 
michael@0: // validate that a C String containing %-escapes is syntactically valid
michael@0: bool IsValidPercentEscaped(const char *aValue, int32_t len)
michael@0: {
michael@0:   for (int32_t i = 0; i < len; i++) {
michael@0:     if (aValue[i] == '%') {
michael@0:       if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
michael@0:         return false;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: // Support for continuations (RFC 2231, Section 3)
michael@0: 
michael@0: // only a sane number supported
michael@0: #define MAX_CONTINUATIONS 999
michael@0: 
michael@0: // part of a continuation
michael@0: 
michael@0: class Continuation {
michael@0:   public:
michael@0:     Continuation(const char *aValue, uint32_t aLength,
michael@0:                  bool aNeedsPercentDecoding, bool aWasQuotedString) {
michael@0:       value = aValue;
michael@0:       length = aLength;
michael@0:       needsPercentDecoding = aNeedsPercentDecoding;
michael@0:       wasQuotedString = aWasQuotedString;
michael@0:     }
michael@0:     Continuation() {
michael@0:       // empty constructor needed for nsTArray
michael@0:       value = 0L;
michael@0:       length = 0;
michael@0:       needsPercentDecoding = false;
michael@0:       wasQuotedString = false;
michael@0:     }
michael@0:     ~Continuation() {}
michael@0: 
michael@0:     const char *value;
michael@0:     uint32_t length;
michael@0:     bool needsPercentDecoding;
michael@0:     bool wasQuotedString;
michael@0: };
michael@0: 
michael@0: // combine segments into a single string, returning the allocated string
michael@0: // (or nullptr) while emptying the list 
michael@0: char *combineContinuations(nsTArray<Continuation>& aArray)
michael@0: {
michael@0:   // Sanity check
michael@0:   if (aArray.Length() == 0)
michael@0:     return nullptr;
michael@0: 
michael@0:   // Get an upper bound for the length
michael@0:   uint32_t length = 0;
michael@0:   for (uint32_t i = 0; i < aArray.Length(); i++) {
michael@0:     length += aArray[i].length;
michael@0:   }
michael@0: 
michael@0:   // Allocate
michael@0:   char *result = (char *) nsMemory::Alloc(length + 1);
michael@0: 
michael@0:   // Concatenate
michael@0:   if (result) {
michael@0:     *result = '\0';
michael@0: 
michael@0:     for (uint32_t i = 0; i < aArray.Length(); i++) {
michael@0:       Continuation cont = aArray[i];
michael@0:       if (! cont.value) break;
michael@0: 
michael@0:       char *c = result + strlen(result);
michael@0:       strncat(result, cont.value, cont.length);
michael@0:       if (cont.needsPercentDecoding) {
michael@0:         nsUnescape(c);
michael@0:       }
michael@0:       if (cont.wasQuotedString) {
michael@0:         RemoveQuotedStringEscapes(c);
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     // return null if empty value
michael@0:     if (*result == '\0') {
michael@0:       nsMemory::Free(result);
michael@0:       result = nullptr;
michael@0:     }
michael@0:   } else {
michael@0:     // Handle OOM
michael@0:     NS_WARNING("Out of memory\n");
michael@0:   }
michael@0: 
michael@0:   return result;
michael@0: }
michael@0: 
michael@0: // add a continuation, return false on error if segment already has been seen
michael@0: bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
michael@0:                      const char *aValue, uint32_t aLength,
michael@0:                      bool aNeedsPercentDecoding, bool aWasQuotedString)
michael@0: {
michael@0:   if (aIndex < aArray.Length() && aArray[aIndex].value) {
michael@0:     NS_WARNING("duplicate RC2231 continuation segment #\n");
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if (aIndex > MAX_CONTINUATIONS) {
michael@0:     NS_WARNING("RC2231 continuation segment # exceeds limit\n");
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   if (aNeedsPercentDecoding && aWasQuotedString) {
michael@0:     NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
michael@0: 
michael@0:   if (aArray.Length() <= aIndex) {
michael@0:     aArray.SetLength(aIndex + 1);
michael@0:   }
michael@0:   aArray[aIndex] = cont;
michael@0: 
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: // parse a segment number; return -1 on error
michael@0: int32_t parseSegmentNumber(const char *aValue, int32_t aLen)
michael@0: {
michael@0:   if (aLen < 1) {
michael@0:     NS_WARNING("segment number missing\n");
michael@0:     return -1;
michael@0:   }
michael@0: 
michael@0:   if (aLen > 1 && aValue[0] == '0') {
michael@0:     NS_WARNING("leading '0' not allowed in segment number\n");
michael@0:     return -1;
michael@0:   }
michael@0: 
michael@0:   int32_t segmentNumber = 0;
michael@0: 
michael@0:   for (int32_t i = 0; i < aLen; i++) {
michael@0:     if (! (aValue[i] >= '0' && aValue[i] <= '9')) {
michael@0:       NS_WARNING("invalid characters in segment number\n");
michael@0:       return -1;
michael@0:     }
michael@0: 
michael@0:     segmentNumber *= 10;
michael@0:     segmentNumber += aValue[i] - '0';
michael@0:     if (segmentNumber > MAX_CONTINUATIONS) {
michael@0:       NS_WARNING("Segment number exceeds sane size\n");
michael@0:       return -1;
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return segmentNumber;
michael@0: }
michael@0: 
michael@0: // validate a given octet sequence for compliance with the specified
michael@0: // encoding
michael@0: bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
michael@0: {
michael@0:   nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
michael@0:     (NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0:   if (!cvtUTF8) {
michael@0:     NS_WARNING("Can't get UTF8ConverterService\n");
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   nsAutoCString tmpRaw;
michael@0:   tmpRaw.Assign(aOctets);
michael@0:   nsAutoCString tmpDecoded;
michael@0: 
michael@0:   nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
michael@0:                                              PromiseFlatCString(aCharset).get(),
michael@0:                                              false, false, 1, tmpDecoded);
michael@0: 
michael@0:   if (rv != NS_OK) {
michael@0:     // we can't decode; charset may be unsupported, or the octet sequence
michael@0:     // is broken (illegal or incomplete octet sequence contained)
michael@0:     NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: // moved almost verbatim from mimehdrs.cpp
michael@0: // char *
michael@0: // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
michael@0: //                            char **charset, char **language)
michael@0: //
michael@0: // The format of these header lines  is
michael@0: // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
michael@0: NS_IMETHODIMP 
michael@0: nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, 
michael@0:                                             const char *aParamName,
michael@0:                                             char **aCharset,
michael@0:                                             char **aLang,
michael@0:                                             char **aResult)
michael@0: {
michael@0:   return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
michael@0:                              aCharset, aLang, aResult);
michael@0: }
michael@0: 
michael@0: 
michael@0: nsresult 
michael@0: nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, 
michael@0:                                            const char *aParamName,
michael@0:                                            ParamDecoding aDecoding,
michael@0:                                            char **aCharset,
michael@0:                                            char **aLang,
michael@0:                                            char **aResult)
michael@0: {
michael@0: 
michael@0:   if (!aHeaderValue ||  !*aHeaderValue || !aResult)
michael@0:     return NS_ERROR_INVALID_ARG;
michael@0: 
michael@0:   *aResult = nullptr;
michael@0: 
michael@0:   if (aCharset) *aCharset = nullptr;
michael@0:   if (aLang) *aLang = nullptr;
michael@0: 
michael@0:   nsAutoCString charset;
michael@0: 
michael@0:   // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
michael@0:   // them for HTTP header fields later on, see bug 776324
michael@0:   bool acceptContinuations = true;
michael@0: 
michael@0:   const char *str = aHeaderValue;
michael@0: 
michael@0:   // skip leading white space.
michael@0:   for (; *str &&  nsCRT::IsAsciiSpace(*str); ++str)
michael@0:     ;
michael@0:   const char *start = str;
michael@0:   
michael@0:   // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
michael@0:   // For instance, return 'inline' in the following case:
michael@0:   // Content-Disposition: inline; filename=.....
michael@0:   if (!aParamName || !*aParamName) 
michael@0:     {
michael@0:       for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
michael@0:         ;
michael@0:       if (str == start)
michael@0:         return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
michael@0: 
michael@0:       *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
michael@0:       NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
michael@0:       (*aResult)[str - start] = '\0';  // null-terminate
michael@0:       return NS_OK;
michael@0:     }
michael@0: 
michael@0:   /* Skip forward to first ';' */
michael@0:   for (; *str && *str != ';' && *str != ','; ++str)
michael@0:     ;
michael@0:   if (*str)
michael@0:     str++;
michael@0:   /* Skip over following whitespace */
michael@0:   for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
michael@0:     ;
michael@0: 
michael@0:   // Some broken http servers just specify parameters
michael@0:   // like 'filename' without specifying disposition
michael@0:   // method. Rewind to the first non-white-space
michael@0:   // character.
michael@0:   
michael@0:   if (!*str)
michael@0:     str = start;
michael@0: 
michael@0:   // RFC2231 - The legitimate parm format can be:
michael@0:   // A. title=ThisIsTitle 
michael@0:   // B. title*=us-ascii'en-us'This%20is%20wierd.
michael@0:   // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
michael@0:   //    title*1*=have%20to%20support%20this.
michael@0:   //    title*2="Else..."
michael@0:   // D. title*0="Hey, what you think you are doing?"
michael@0:   //    title*1="There is no charset and lang info."
michael@0:   // RFC5987: only A and B
michael@0:   
michael@0:   // collect results for the different algorithms (plain filename,
michael@0:   // RFC5987/2231-encoded filename, + continuations) separately and decide
michael@0:   // which to use at the end
michael@0:   char *caseAResult = nullptr;
michael@0:   char *caseBResult = nullptr;
michael@0:   char *caseCDResult = nullptr;
michael@0: 
michael@0:   // collect continuation segments
michael@0:   nsTArray<Continuation> segments;
michael@0: 
michael@0: 
michael@0:   // our copies of the charset parameter, kept separately as they might
michael@0:   // differ for the two formats
michael@0:   nsDependentCSubstring charsetB, charsetCD;
michael@0: 
michael@0:   nsDependentCSubstring lang;
michael@0: 
michael@0:   int32_t paramLen = strlen(aParamName);
michael@0: 
michael@0:   while (*str) {
michael@0:     // find name/value
michael@0: 
michael@0:     const char *nameStart = str;
michael@0:     const char *nameEnd = nullptr;
michael@0:     const char *valueStart = str;
michael@0:     const char *valueEnd = nullptr;
michael@0:     bool isQuotedString = false;
michael@0: 
michael@0:     NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
michael@0: 
michael@0:     // Skip forward to the end of this token. 
michael@0:     for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
michael@0:       ;
michael@0:     nameEnd = str;
michael@0: 
michael@0:     int32_t nameLen = nameEnd - nameStart;
michael@0: 
michael@0:     // Skip over whitespace, '=', and whitespace
michael@0:     while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0:     if (!*str) {
michael@0:       break;
michael@0:     }
michael@0:     if (*str++ != '=') {
michael@0:       // don't accept parameters without "="
michael@0:       goto increment_str;
michael@0:     }
michael@0:     while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0: 
michael@0:     if (*str != '"') {
michael@0:       // The value is a token, not a quoted string.
michael@0:       valueStart = str;
michael@0:       for (valueEnd = str;
michael@0:            *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
michael@0:            valueEnd++)
michael@0:         ;
michael@0:       str = valueEnd;
michael@0:     } else {
michael@0:       isQuotedString = true;
michael@0:       
michael@0:       ++str;
michael@0:       valueStart = str;
michael@0:       for (valueEnd = str; *valueEnd; ++valueEnd) {
michael@0:         if (*valueEnd == '\\' && *(valueEnd + 1))
michael@0:           ++valueEnd;
michael@0:         else if (*valueEnd == '"')
michael@0:           break;
michael@0:       }
michael@0:       str = valueEnd;
michael@0:       // *valueEnd != null means that *valueEnd is quote character.
michael@0:       if (*valueEnd)
michael@0:         str++;
michael@0:     }
michael@0: 
michael@0:     // See if this is the simplest case (case A above),
michael@0:     // a 'single' line value with no charset and lang.
michael@0:     // If so, copy it and return.
michael@0:     if (nameLen == paramLen &&
michael@0:         !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
michael@0: 
michael@0:       if (caseAResult) {
michael@0:         // we already have one caseA result, ignore subsequent ones
michael@0:         goto increment_str;
michael@0:       }
michael@0: 
michael@0:       // if the parameter spans across multiple lines we have to strip out the
michael@0:       //     line continuation -- jht 4/29/98 
michael@0:       nsAutoCString tempStr(valueStart, valueEnd - valueStart);
michael@0:       tempStr.StripChars("\r\n");
michael@0:       char *res = ToNewCString(tempStr);
michael@0:       NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
michael@0:       
michael@0:       if (isQuotedString)
michael@0:         RemoveQuotedStringEscapes(res);
michael@0: 
michael@0:       caseAResult = res;
michael@0:       // keep going, we may find a RFC 2231/5987 encoded alternative
michael@0:     }
michael@0:     // case B, C, and D
michael@0:     else if (nameLen > paramLen &&
michael@0:              !nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
michael@0:              *(nameStart + paramLen) == '*') {
michael@0: 
michael@0:       // 1st char past '*'       
michael@0:       const char *cp = nameStart + paramLen + 1; 
michael@0: 
michael@0:       // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
michael@0:       bool needExtDecoding = *(nameEnd - 1) == '*';      
michael@0: 
michael@0:       bool caseB = nameLen == paramLen + 1;
michael@0:       bool caseCStart = (*cp == '0') && needExtDecoding;
michael@0: 
michael@0:       // parse the segment number
michael@0:       int32_t segmentNumber = -1;
michael@0:       if (!caseB) {
michael@0:         int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
michael@0:         segmentNumber = parseSegmentNumber(cp, segLen);
michael@0: 
michael@0:         if (segmentNumber == -1) {
michael@0:           acceptContinuations = false;
michael@0:           goto increment_str;
michael@0:         }
michael@0:       }
michael@0: 
michael@0:       // CaseB and start of CaseC: requires charset and optional language
michael@0:       // in quotes (quotes required even if lang is blank)
michael@0:       if (caseB || (caseCStart && acceptContinuations)) {
michael@0:         // look for single quotation mark(')
michael@0:         const char *sQuote1 = PL_strchr(valueStart, 0x27);
michael@0:         const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
michael@0: 
michael@0:         // Two single quotation marks must be present even in
michael@0:         // absence of charset and lang. 
michael@0:         if (!sQuote1 || !sQuote2) {
michael@0:           NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
michael@0:         }
michael@0: 
michael@0:         const char *charsetStart = nullptr;
michael@0:         int32_t charsetLength = 0;
michael@0:         const char *langStart = nullptr;
michael@0:         int32_t langLength = 0;
michael@0:         const char *rawValStart = nullptr;
michael@0:         int32_t rawValLength = 0;
michael@0: 
michael@0:         if (sQuote2 && sQuote1) {
michael@0:           // both delimiters present: charSet'lang'rawVal
michael@0:           rawValStart = sQuote2 + 1;
michael@0:           rawValLength = valueEnd - rawValStart;
michael@0: 
michael@0:           langStart = sQuote1 + 1;
michael@0:           langLength = sQuote2 - langStart;
michael@0: 
michael@0:           charsetStart = valueStart;
michael@0:           charsetLength = sQuote1 - charsetStart;
michael@0:         }
michael@0:         else if (sQuote1) {
michael@0:           // one delimiter; assume charset'rawVal
michael@0:           rawValStart = sQuote1 + 1;
michael@0:           rawValLength = valueEnd - rawValStart;
michael@0: 
michael@0:           charsetStart = valueStart;
michael@0:           charsetLength = sQuote1 - valueStart;
michael@0:         }
michael@0:         else {
michael@0:           // no delimiter: just rawVal
michael@0:           rawValStart = valueStart;
michael@0:           rawValLength = valueEnd - valueStart;
michael@0:         }
michael@0: 
michael@0:         if (langLength != 0) {
michael@0:           lang.Assign(langStart, langLength);
michael@0:         }
michael@0: 
michael@0:         // keep the charset for later
michael@0:         if (caseB) {
michael@0:           charsetB.Assign(charsetStart, charsetLength);
michael@0:         } else {
michael@0:           // if caseCorD
michael@0:           charsetCD.Assign(charsetStart, charsetLength);
michael@0:         }
michael@0: 
michael@0:         // non-empty value part
michael@0:         if (rawValLength > 0) {
michael@0:           if (!caseBResult && caseB) {
michael@0:             if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
michael@0:               goto increment_str;
michael@0:             }
michael@0: 
michael@0:             // allocate buffer for the raw value
michael@0:             char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1);
michael@0:             if (!tmpResult) {
michael@0:               goto increment_str;
michael@0:             }
michael@0:             *(tmpResult + rawValLength) = 0;
michael@0: 
michael@0:             nsUnescape(tmpResult);
michael@0:             caseBResult = tmpResult;
michael@0:           } else {
michael@0:             // caseC
michael@0:             bool added = addContinuation(segments, 0, rawValStart,
michael@0:                                          rawValLength, needExtDecoding,
michael@0:                                          isQuotedString);
michael@0: 
michael@0:             if (!added) {
michael@0:               // continuation not added, stop processing them
michael@0:               acceptContinuations = false;
michael@0:             }
michael@0:           }
michael@0:         }
michael@0:       }  // end of if-block :  title*0*=  or  title*= 
michael@0:       // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
michael@0:       // or 2nd or later lines of a caseC param : title*[1-9]*= 
michael@0:       else if (acceptContinuations && segmentNumber != -1) {
michael@0:         uint32_t valueLength = valueEnd - valueStart;
michael@0: 
michael@0:         bool added = addContinuation(segments, segmentNumber, valueStart,
michael@0:                                      valueLength, needExtDecoding,
michael@0:                                      isQuotedString);
michael@0: 
michael@0:         if (!added) {
michael@0:           // continuation not added, stop processing them
michael@0:           acceptContinuations = false;
michael@0:         }
michael@0:       } // end of if-block :  title*[0-9]= or title*[1-9]*=
michael@0:     }
michael@0: 
michael@0:     // str now points after the end of the value.
michael@0:     //   skip over whitespace, ';', whitespace.
michael@0: increment_str:      
michael@0:     while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0:     if (*str == ';') {
michael@0:       ++str;
michael@0:     } else {
michael@0:       // stop processing the header field; either we are done or the
michael@0:       // separator was missing
michael@0:       break;
michael@0:     }
michael@0:     while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0:   }
michael@0: 
michael@0:   caseCDResult = combineContinuations(segments);
michael@0: 
michael@0:   if (caseBResult && !charsetB.IsEmpty()) {
michael@0:     // check that the 2231/5987 result decodes properly given the
michael@0:     // specified character set
michael@0:     if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
michael@0:       caseBResult = nullptr;
michael@0:   }
michael@0: 
michael@0:   if (caseCDResult && !charsetCD.IsEmpty()) {
michael@0:     // check that the 2231/5987 result decodes properly given the
michael@0:     // specified character set
michael@0:     if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
michael@0:       caseCDResult = nullptr;
michael@0:   }
michael@0: 
michael@0:   if (caseBResult) {
michael@0:     // prefer simple 5987 format over 2231 with continuations
michael@0:     *aResult = caseBResult;
michael@0:     caseBResult = nullptr;
michael@0:     charset.Assign(charsetB);
michael@0:   }
michael@0:   else if (caseCDResult) {
michael@0:     // prefer 2231/5987 with or without continuations over plain format
michael@0:     *aResult = caseCDResult;
michael@0:     caseCDResult = nullptr;
michael@0:     charset.Assign(charsetCD);
michael@0:   }
michael@0:   else if (caseAResult) {
michael@0:     *aResult = caseAResult;
michael@0:     caseAResult = nullptr;
michael@0:   }
michael@0: 
michael@0:   // free unused stuff
michael@0:   nsMemory::Free(caseAResult);
michael@0:   nsMemory::Free(caseBResult);
michael@0:   nsMemory::Free(caseCDResult);
michael@0: 
michael@0:   // if we have a result
michael@0:   if (*aResult) {
michael@0:     // then return charset and lang as well
michael@0:     if (aLang && !lang.IsEmpty()) {
michael@0:       uint32_t len = lang.Length();
michael@0:       *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1);
michael@0:       if (*aLang) {
michael@0:         *(*aLang + len) = 0;
michael@0:       }
michael@0:    }
michael@0:     if (aCharset && !charset.IsEmpty()) {
michael@0:       uint32_t len = charset.Length();
michael@0:       *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1);
michael@0:       if (*aCharset) {
michael@0:         *(*aCharset + len) = 0;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
michael@0: }
michael@0: 
michael@0: nsresult
michael@0: internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
michael@0:                             bool aOverrideCharset, bool aEatContinuations,
michael@0:                             nsACString& aResult)
michael@0: {
michael@0:   aResult.Truncate();
michael@0:   if (!aHeaderVal)
michael@0:     return NS_ERROR_INVALID_ARG;
michael@0:   if (!*aHeaderVal)
michael@0:     return NS_OK;
michael@0: 
michael@0: 
michael@0:   // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string  but
michael@0:   // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
michael@0:   // to UTF-8. Otherwise, just strips away CRLF. 
michael@0:   if (PL_strstr(aHeaderVal, "=?") || 
michael@0:       (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || 
michael@0:       Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
michael@0:     DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
michael@0:   } else if (aEatContinuations && 
michael@0:              (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
michael@0:     aResult = aHeaderVal;
michael@0:   } else {
michael@0:     aEatContinuations = false;
michael@0:     aResult = aHeaderVal;
michael@0:   }
michael@0: 
michael@0:   if (aEatContinuations) {
michael@0:     nsAutoCString temp(aResult);
michael@0:     temp.ReplaceSubstring("\n\t", " ");
michael@0:     temp.ReplaceSubstring("\r\t", " ");
michael@0:     temp.StripChars("\r\n");
michael@0:     aResult = temp;
michael@0:   }
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, 
michael@0:                                            const char* aDefaultCharset, 
michael@0:                                            bool aOverrideCharset, 
michael@0:                                            bool aEatContinuations,
michael@0:                                            nsACString& aResult)
michael@0: {
michael@0:   return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
michael@0:                                      aOverrideCharset, aEatContinuations,
michael@0:                                      aResult);
michael@0: }
michael@0: 
michael@0: // true if the character is allowed in a RFC 5987 value
michael@0: // see RFC 5987, Section 3.2.1, "attr-char"
michael@0: bool IsRFC5987AttrChar(char aChar)
michael@0: {
michael@0:   char c = aChar;
michael@0: 
michael@0:   return (c >= 'a' && c <= 'z') ||
michael@0:          (c >= 'A' && c <= 'Z') ||
michael@0:          (c >= '0' && c <= '9') ||
michael@0:          (c == '!' || c == '#' || c == '$' || c == '&' ||
michael@0:           c == '+' || c == '-' || c == '.' || c == '^' ||
michael@0:           c == '_' || c == '`' || c == '|' || c == '~');
michael@0: }
michael@0: 
michael@0: // percent-decode a value
michael@0: // returns false on failure
michael@0: bool PercentDecode(nsACString& aValue)
michael@0: {
michael@0:   char *c = (char *) nsMemory::Alloc(aValue.Length() + 1);
michael@0:   if (!c) {
michael@0:     return false;
michael@0:   }
michael@0: 
michael@0:   strcpy(c, PromiseFlatCString(aValue).get());
michael@0:   nsUnescape(c);
michael@0:   aValue.Assign(c);
michael@0:   nsMemory::Free(c);
michael@0: 
michael@0:   return true;
michael@0: }
michael@0: 
michael@0: // Decode a parameter value using the encoding defined in RFC 5987
michael@0: // 
michael@0: // charset  "'" [ language ] "'" value-chars
michael@0: NS_IMETHODIMP 
michael@0: nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
michael@0:                                           nsACString& aLang,
michael@0:                                           nsAString& aResult)
michael@0: {
michael@0:   nsAutoCString charset;
michael@0:   nsAutoCString language;
michael@0:   nsAutoCString value;
michael@0: 
michael@0:   uint32_t delimiters = 0;
michael@0:   const char *encoded = PromiseFlatCString(aParamVal).get();
michael@0:   const char *c = encoded;
michael@0: 
michael@0:   while (*c) {
michael@0:     char tc = *c++;
michael@0: 
michael@0:     if (tc == '\'') {
michael@0:       // single quote
michael@0:       delimiters++;
michael@0:     } else if (((unsigned char)tc) >= 128) {
michael@0:       // fail early, not ASCII
michael@0:       NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
michael@0:       return NS_ERROR_INVALID_ARG;
michael@0:     } else {
michael@0:       if (delimiters == 0) {
michael@0:         // valid characters are checked later implicitly
michael@0:         charset.Append(tc);
michael@0:       } else if (delimiters == 1) {
michael@0:         // no value checking for now
michael@0:         language.Append(tc);
michael@0:       } else if (delimiters == 2) {
michael@0:         if (IsRFC5987AttrChar(tc)) {
michael@0:           value.Append(tc);
michael@0:         } else if (tc == '%') {
michael@0:           if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
michael@0:             // we expect two more characters
michael@0:             NS_WARNING("broken %-escape in RFC5987-encoded param");
michael@0:             return NS_ERROR_INVALID_ARG;
michael@0:           }
michael@0:           value.Append(tc);
michael@0:           // we consume two more
michael@0:           value.Append(*c++);
michael@0:           value.Append(*c++);
michael@0:         } else {
michael@0:           // character not allowed here
michael@0:           NS_WARNING("invalid character in RFC5987-encoded param");
michael@0:           return NS_ERROR_INVALID_ARG;
michael@0:         }      
michael@0:       }
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   if (delimiters != 2) {
michael@0:     NS_WARNING("missing delimiters in RFC5987-encoded param");
michael@0:     return NS_ERROR_INVALID_ARG;
michael@0:   }
michael@0: 
michael@0:   // abort early for unsupported encodings
michael@0:   if (!charset.LowerCaseEqualsLiteral("utf-8")) {
michael@0:     NS_WARNING("unsupported charset in RFC5987-encoded param");
michael@0:     return NS_ERROR_INVALID_ARG;
michael@0:   }
michael@0: 
michael@0:   // percent-decode
michael@0:   if (!PercentDecode(value)) {
michael@0:     return NS_ERROR_OUT_OF_MEMORY;
michael@0:   }
michael@0: 
michael@0:   // return the encoding
michael@0:   aLang.Assign(language);
michael@0: 
michael@0:   // finally convert octet sequence to UTF-8 and be done
michael@0:   nsresult rv = NS_OK;
michael@0:   nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 =
michael@0:     do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv);
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   nsAutoCString utf8;
michael@0:   rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8);
michael@0:   NS_ENSURE_SUCCESS(rv, rv);
michael@0: 
michael@0:   CopyUTF8toUTF16(utf8, aResult);
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: nsresult 
michael@0: internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
michael@0:                         const char* aDefaultCharset, bool aOverrideCharset,
michael@0:                         bool aDecode2047, nsACString& aResult)
michael@0: {
michael@0:   aResult.Truncate();
michael@0:   // If aCharset is given, aParamValue was obtained from RFC2231/5987 
michael@0:   // encoding and we're pretty sure that it's in aCharset.
michael@0:   if (aCharset && *aCharset)
michael@0:   {
michael@0:     nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0:     if (cvtUTF8)
michael@0:       return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
michael@0:           true, true, 1, aResult);
michael@0:   }
michael@0: 
michael@0:   const nsAFlatCString& param = PromiseFlatCString(aParamValue);
michael@0:   nsAutoCString unQuoted;
michael@0:   nsACString::const_iterator s, e;
michael@0:   param.BeginReading(s);
michael@0:   param.EndReading(e);
michael@0: 
michael@0:   // strip '\' when used to quote CR, LF, '"' and '\'
michael@0:   for ( ; s != e; ++s) {
michael@0:     if ((*s == '\\')) {
michael@0:       if (++s == e) {
michael@0:         --s; // '\' is at the end. move back and append '\'.
michael@0:       }
michael@0:       else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
michael@0:         --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
michael@0:       }
michael@0:       // else : skip '\' and append the quoted character.
michael@0:     }
michael@0:     unQuoted.Append(*s);
michael@0:   }
michael@0: 
michael@0:   aResult = unQuoted;
michael@0:   nsresult rv = NS_OK;
michael@0:   
michael@0:   if (aDecode2047) {
michael@0:     nsAutoCString decoded;
michael@0: 
michael@0:     // Try RFC 2047 encoding, instead.
michael@0:     rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
michael@0:                                      aOverrideCharset, true, decoded);
michael@0: 
michael@0:     if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
michael@0:       aResult = decoded;
michael@0:   }
michael@0:     
michael@0:   return rv;
michael@0: }
michael@0: 
michael@0: NS_IMETHODIMP
michael@0: nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
michael@0:                                        const char* aCharset,
michael@0:                                        const char* aDefaultCharset,
michael@0:                                        bool aOverrideCharset,
michael@0:                                        nsACString& aResult)
michael@0: {
michael@0:   return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
michael@0:                                  aOverrideCharset, true, aResult);
michael@0: }
michael@0: 
michael@0: #define ISHEXCHAR(c) \
michael@0:         ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39)  ||  \
michael@0:          (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46)  ||  \
michael@0:          (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
michael@0: 
michael@0: // Decode Q encoding (RFC 2047).
michael@0: // static
michael@0: char *DecodeQ(const char *in, uint32_t length)
michael@0: {
michael@0:   char *out, *dest = 0;
michael@0: 
michael@0:   out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
michael@0:   if (dest == nullptr)
michael@0:     return nullptr;
michael@0:   while (length > 0) {
michael@0:     unsigned c = 0;
michael@0:     switch (*in) {
michael@0:     case '=':
michael@0:       // check if |in| in the form of '=hh'  where h is [0-9a-fA-F].
michael@0:       if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
michael@0:         goto badsyntax;
michael@0:       PR_sscanf(in + 1, "%2X", &c);
michael@0:       *out++ = (char) c;
michael@0:       in += 3;
michael@0:       length -= 3;
michael@0:       break;
michael@0: 
michael@0:     case '_':
michael@0:       *out++ = ' ';
michael@0:       in++;
michael@0:       length--;
michael@0:       break;
michael@0: 
michael@0:     default:
michael@0:       if (*in & 0x80) goto badsyntax;
michael@0:       *out++ = *in++;
michael@0:       length--;
michael@0:     }
michael@0:   }
michael@0:   *out++ = '\0';
michael@0: 
michael@0:   for (out = dest; *out ; ++out) {
michael@0:     if (*out == '\t')
michael@0:       *out = ' ';
michael@0:   }
michael@0: 
michael@0:   return dest;
michael@0: 
michael@0:  badsyntax:
michael@0:   PR_Free(dest);
michael@0:   return nullptr;
michael@0: }
michael@0: 
michael@0: // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) 
michael@0: // or has  ESC which may be an  indication that  it's in one of many ISO 
michael@0: // 2022 7bit  encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
michael@0: // static
michael@0: bool Is7bitNonAsciiString(const char *input, uint32_t len)
michael@0: {
michael@0:   int32_t c;
michael@0: 
michael@0:   enum { hz_initial, // No HZ seen yet
michael@0:          hz_escaped, // Inside an HZ ~{ escape sequence 
michael@0:          hz_seen, // Have seen at least one complete HZ sequence 
michael@0:          hz_notpresent // Have seen something that is not legal HZ
michael@0:   } hz_state;
michael@0: 
michael@0:   hz_state = hz_initial;
michael@0:   while (len) {
michael@0:     c = uint8_t(*input++);
michael@0:     len--;
michael@0:     if (c & 0x80) return false;
michael@0:     if (c == 0x1B) return true;
michael@0:     if (c == '~') {
michael@0:       switch (hz_state) {
michael@0:       case hz_initial:
michael@0:       case hz_seen:
michael@0:         if (*input == '{') {
michael@0:           hz_state = hz_escaped;
michael@0:         } else if (*input == '~') {
michael@0:           // ~~ is the HZ encoding of ~.  Skip over second ~ as well
michael@0:           hz_state = hz_seen;
michael@0:           input++;
michael@0:           len--;
michael@0:         } else {
michael@0:           hz_state = hz_notpresent;
michael@0:         }
michael@0:         break;
michael@0: 
michael@0:       case hz_escaped:
michael@0:         if (*input == '}') hz_state = hz_seen;
michael@0:         break;
michael@0:       default:
michael@0:         break;
michael@0:       }
michael@0:     }
michael@0:   }
michael@0:   return hz_state == hz_seen;
michael@0: }
michael@0: 
michael@0: #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
michael@0: 
michael@0: // copy 'raw' sequences of octets in aInput to aOutput.
michael@0: // If aDefaultCharset is specified, the input is assumed to be in the
michael@0: // charset and converted to UTF-8. Otherwise, a blind copy is made.
michael@0: // If aDefaultCharset is specified, but the conversion to UTF-8
michael@0: // is not successful, each octet is replaced by Unicode replacement
michael@0: // chars. *aOutput is advanced by the number of output octets.
michael@0: // static
michael@0: void CopyRawHeader(const char *aInput, uint32_t aLen, 
michael@0:                    const char *aDefaultCharset, nsACString &aOutput)
michael@0: {
michael@0:   int32_t c;
michael@0: 
michael@0:   // If aDefaultCharset is not specified, make a blind copy.
michael@0:   if (!aDefaultCharset || !*aDefaultCharset) {
michael@0:     aOutput.Append(aInput, aLen);
michael@0:     return;
michael@0:   }
michael@0: 
michael@0:   // Copy as long as it's US-ASCII.  An ESC may indicate ISO 2022
michael@0:   // A ~ may indicate it is HZ
michael@0:   while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
michael@0:     aOutput.Append(char(c));
michael@0:     aLen--;
michael@0:   }
michael@0:   if (!aLen) {
michael@0:     return;
michael@0:   }
michael@0:   aInput--;
michael@0: 
michael@0:   // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
michael@0:   // string and aDefaultCharset is a 7bit non-ascii charset.
michael@0:   bool skipCheck = (c == 0x1B || c == '~') && 
michael@0:                      IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
michael@0: 
michael@0:   // If not UTF-8, treat as default charset
michael@0:   nsCOMPtr<nsIUTF8ConverterService> 
michael@0:     cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0:   nsAutoCString utf8Text;
michael@0:   if (cvtUTF8 &&
michael@0:       NS_SUCCEEDED(
michael@0:       cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), 
michael@0:                                    aDefaultCharset, skipCheck, true, 1,
michael@0:                                    utf8Text))) {
michael@0:     aOutput.Append(utf8Text);
michael@0:   } else { // replace each octet with Unicode replacement char in UTF-8.
michael@0:     for (uint32_t i = 0; i < aLen; i++) {
michael@0:       c = uint8_t(*aInput++);
michael@0:       if (c & 0x80)
michael@0:         aOutput.Append(REPLACEMENT_CHAR);
michael@0:       else
michael@0:         aOutput.Append(char(c));
michael@0:     }
michael@0:   }
michael@0: }
michael@0: 
michael@0: nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64,
michael@0:                             const char *aCharset, nsACString &aResult)
michael@0: {
michael@0:   char *decodedText;
michael@0:   NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
michael@0:   if(aQOrBase64 == 'Q')
michael@0:     decodedText = DecodeQ(aEncoded, aLen);
michael@0:   else if (aQOrBase64 == 'B') {
michael@0:     decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
michael@0:   } else {
michael@0:     return NS_ERROR_INVALID_ARG;
michael@0:   }
michael@0: 
michael@0:   if (!decodedText) {
michael@0:     return NS_ERROR_INVALID_ARG;
michael@0:   }
michael@0: 
michael@0:   nsresult rv;
michael@0:   nsCOMPtr<nsIUTF8ConverterService>
michael@0:     cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
michael@0:   nsAutoCString utf8Text;
michael@0:   if (NS_SUCCEEDED(rv)) {
michael@0:     // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
michael@0:     rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
michael@0:                                       aCharset,
michael@0:                                       IS_7BIT_NON_ASCII_CHARSET(aCharset),
michael@0:                                       true, 1, utf8Text);
michael@0:   }
michael@0:   PR_Free(decodedText);
michael@0:   if (NS_FAILED(rv)) {
michael@0:     return rv;
michael@0:   }
michael@0:   aResult.Append(utf8Text);
michael@0: 
michael@0:   return NS_OK;
michael@0: }
michael@0: 
michael@0: static const char especials[] = "()<>@,;:\\\"/[]?.=";
michael@0: 
michael@0: // |decode_mime_part2_str| taken from comi18n.c
michael@0: // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
michael@0: // If aOverrideCharset is true, charset in RFC2047-encoded words is 
michael@0: // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
michael@0: // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
michael@0: //static
michael@0: nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, 
michael@0:                           bool aOverrideCharset, nsACString &aResult)
michael@0: {
michael@0:   const char *p, *q = nullptr, *r;
michael@0:   const char *begin; // tracking pointer for where we are in the input buffer
michael@0:   int32_t isLastEncodedWord = 0;
michael@0:   const char *charsetStart, *charsetEnd;
michael@0:   nsAutoCString prevCharset, curCharset;
michael@0:   nsAutoCString encodedText;
michael@0:   char prevEncoding = '\0', curEncoding;
michael@0:   nsresult rv;
michael@0: 
michael@0:   begin = aHeader;
michael@0: 
michael@0:   // To avoid buffer realloc, if possible, set capacity in advance. No 
michael@0:   // matter what,  more than 3x expansion can never happen for all charsets
michael@0:   // supported by Mozilla. SCSU/BCSU with the sliding window set to a
michael@0:   // non-BMP block may be exceptions, but Mozilla does not support them. 
michael@0:   // Neither any known mail/news program use them. Even if there's, we're
michael@0:   // safe because we don't use a raw *char any more.
michael@0:   aResult.SetCapacity(3 * strlen(aHeader));
michael@0: 
michael@0:   while ((p = PL_strstr(begin, "=?")) != 0) {
michael@0:     if (isLastEncodedWord) {
michael@0:       // See if it's all whitespace.
michael@0:       for (q = begin; q < p; ++q) {
michael@0:         if (!PL_strchr(" \t\r\n", *q)) break;
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     if (!isLastEncodedWord || q < p) {
michael@0:       if (!encodedText.IsEmpty()) {
michael@0:         rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0:                                 prevEncoding, prevCharset.get(), aResult);
michael@0:         if (NS_FAILED(rv)) {
michael@0:           aResult.Append(encodedText);
michael@0:         }
michael@0:         encodedText.Truncate();
michael@0:         prevCharset.Truncate();
michael@0:         prevEncoding = '\0';
michael@0:       }
michael@0:       // copy the part before the encoded-word
michael@0:       CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
michael@0:       begin = p;
michael@0:     }
michael@0: 
michael@0:     p += 2;
michael@0: 
michael@0:     // Get charset info
michael@0:     charsetStart = p;
michael@0:     charsetEnd = 0;
michael@0:     for (q = p; *q != '?'; q++) {
michael@0:       if (*q <= ' ' || PL_strchr(especials, *q)) {
michael@0:         goto badsyntax;
michael@0:       }
michael@0: 
michael@0:       // RFC 2231 section 5
michael@0:       if (!charsetEnd && *q == '*') {
michael@0:         charsetEnd = q; 
michael@0:       }
michael@0:     }
michael@0:     if (!charsetEnd) {
michael@0:       charsetEnd = q;
michael@0:     }
michael@0: 
michael@0:     q++;
michael@0:     curEncoding = nsCRT::ToUpper(*q);
michael@0:     if (curEncoding != 'Q' && curEncoding != 'B')
michael@0:       goto badsyntax;
michael@0: 
michael@0:     if (q[1] != '?')
michael@0:       goto badsyntax;
michael@0: 
michael@0:     r = q;
michael@0:     for (r = q + 2; *r != '?'; r++) {
michael@0:       if (*r < ' ') goto badsyntax;
michael@0:     }
michael@0:     if (r[1] != '=')
michael@0:         goto badsyntax;
michael@0:     else if (r == q + 2) {
michael@0:         // it's empty, skip
michael@0:         begin = r + 2;
michael@0:         isLastEncodedWord = 1;
michael@0:         continue;
michael@0:     }
michael@0: 
michael@0:     curCharset.Assign(charsetStart, charsetEnd - charsetStart);
michael@0:     // Override charset if requested.  Never override labeled UTF-8.
michael@0:     // Use default charset instead of UNKNOWN-8BIT
michael@0:     if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8"))
michael@0:     || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))
michael@0:     ) {
michael@0:       curCharset = aDefaultCharset;
michael@0:     }
michael@0: 
michael@0:     const char *R;
michael@0:     R = r;
michael@0:     if (curEncoding == 'B') {
michael@0:       // bug 227290. ignore an extraneous '=' at the end.
michael@0:       // (# of characters in B-encoded part has to be a multiple of 4)
michael@0:       int32_t n = r - (q + 2);
michael@0:       R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
michael@0:     }
michael@0:     // Bug 493544. Don't decode the encoded text until it ends
michael@0:     if (R[-1] != '='
michael@0:       && (prevCharset.IsEmpty()
michael@0:         || (curCharset == prevCharset && curEncoding == prevEncoding))
michael@0:     ) {
michael@0:       encodedText.Append(q + 2, R - (q + 2));
michael@0:       prevCharset = curCharset;
michael@0:       prevEncoding = curEncoding;
michael@0: 
michael@0:       begin = r + 2;
michael@0:       isLastEncodedWord = 1;
michael@0:       continue;
michael@0:     }
michael@0: 
michael@0:     bool bDecoded; // If the current line has been decoded.
michael@0:     bDecoded = false;
michael@0:     if (!encodedText.IsEmpty()) {
michael@0:       if (curCharset == prevCharset && curEncoding == prevEncoding) {
michael@0:         encodedText.Append(q + 2, R - (q + 2));
michael@0:         bDecoded = true;
michael@0:       }
michael@0:       rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0:                               prevEncoding, prevCharset.get(), aResult);
michael@0:       if (NS_FAILED(rv)) {
michael@0:         aResult.Append(encodedText);
michael@0:       }
michael@0:       encodedText.Truncate();
michael@0:       prevCharset.Truncate();
michael@0:       prevEncoding = '\0';
michael@0:     }
michael@0:     if (!bDecoded) {
michael@0:       rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding,
michael@0:                               curCharset.get(), aResult);
michael@0:       if (NS_FAILED(rv)) {
michael@0:         aResult.Append(encodedText);
michael@0:       }
michael@0:     }
michael@0: 
michael@0:     begin = r + 2;
michael@0:     isLastEncodedWord = 1;
michael@0:     continue;
michael@0: 
michael@0:   badsyntax:
michael@0:     if (!encodedText.IsEmpty()) {
michael@0:       rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0:                               prevEncoding, prevCharset.get(), aResult);
michael@0:       if (NS_FAILED(rv)) {
michael@0:         aResult.Append(encodedText);
michael@0:       }
michael@0:       encodedText.Truncate();
michael@0:       prevCharset.Truncate();
michael@0:     }
michael@0:     // copy the part before the encoded-word
michael@0:     aResult.Append(begin, p - begin);
michael@0:     begin = p;
michael@0:     isLastEncodedWord = 0;
michael@0:   }
michael@0: 
michael@0:   if (!encodedText.IsEmpty()) {
michael@0:     rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0:                             prevEncoding, prevCharset.get(), aResult);
michael@0:     if (NS_FAILED(rv)) {
michael@0:       aResult.Append(encodedText);
michael@0:     }
michael@0:   }
michael@0: 
michael@0:   // put the tail back
michael@0:   CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
michael@0: 
michael@0:   nsAutoCString tempStr(aResult);
michael@0:   tempStr.ReplaceChar('\t', ' ');
michael@0:   aResult = tempStr;
michael@0: 
michael@0:   return NS_OK;
michael@0: }