netwerk/mime/nsMIMEHeaderParamImpl.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/netwerk/mime/nsMIMEHeaderParamImpl.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1345 @@
     1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set sw=4 ts=8 et tw=80 : */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#include <string.h>
    1.11 +#include "prmem.h"
    1.12 +#include "prprf.h"
    1.13 +#include "plstr.h"
    1.14 +#include "plbase64.h"
    1.15 +#include "nsCRT.h"
    1.16 +#include "nsMemory.h"
    1.17 +#include "nsTArray.h"
    1.18 +#include "nsCOMPtr.h"
    1.19 +#include "nsEscape.h"
    1.20 +#include "nsIUTF8ConverterService.h"
    1.21 +#include "nsUConvCID.h"
    1.22 +#include "nsIServiceManager.h"
    1.23 +#include "nsMIMEHeaderParamImpl.h"
    1.24 +#include "nsReadableUtils.h"
    1.25 +#include "nsNativeCharsetUtils.h"
    1.26 +#include "nsError.h"
    1.27 +#include "nsIUnicodeDecoder.h"
    1.28 +#include "mozilla/dom/EncodingUtils.h"
    1.29 +
    1.30 +using mozilla::dom::EncodingUtils;
    1.31 +
    1.32 +// static functions declared below are moved from mailnews/mime/src/comi18n.cpp
    1.33 +  
    1.34 +static char *DecodeQ(const char *, uint32_t);
    1.35 +static bool Is7bitNonAsciiString(const char *, uint32_t);
    1.36 +static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
    1.37 +static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
    1.38 +static nsresult internalDecodeParameter(const nsACString&, const char*,
    1.39 +                                        const char*, bool, bool, nsACString&);
    1.40 +
    1.41 +// XXX The chance of UTF-7 being used in the message header is really
    1.42 +// low, but in theory it's possible. 
    1.43 +#define IS_7BIT_NON_ASCII_CHARSET(cset)            \
    1.44 +    (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
    1.45 +     !nsCRT::strncasecmp((cset), "HZ-GB", 5)    || \
    1.46 +     !nsCRT::strncasecmp((cset), "UTF-7", 5))   
    1.47 +
    1.48 +NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
    1.49 +
    1.50 +NS_IMETHODIMP 
    1.51 +nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal, 
    1.52 +                                    const char *aParamName,
    1.53 +                                    const nsACString& aFallbackCharset, 
    1.54 +                                    bool aTryLocaleCharset, 
    1.55 +                                    char **aLang, nsAString& aResult)
    1.56 +{
    1.57 +  return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
    1.58 +                        aFallbackCharset, aTryLocaleCharset, aLang, aResult);
    1.59 +}
    1.60 +
    1.61 +NS_IMETHODIMP 
    1.62 +nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal, 
    1.63 +                                        const char *aParamName,
    1.64 +                                        const nsACString& aFallbackCharset, 
    1.65 +                                        bool aTryLocaleCharset, 
    1.66 +                                        char **aLang, nsAString& aResult)
    1.67 +{
    1.68 +  return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
    1.69 +                        aFallbackCharset, aTryLocaleCharset, aLang, aResult);
    1.70 +}
    1.71 +
    1.72 +// XXX : aTryLocaleCharset is not yet effective.
    1.73 +nsresult 
    1.74 +nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal, 
    1.75 +                                      const char *aParamName,
    1.76 +                                      ParamDecoding aDecoding,
    1.77 +                                      const nsACString& aFallbackCharset, 
    1.78 +                                      bool aTryLocaleCharset, 
    1.79 +                                      char **aLang, nsAString& aResult)
    1.80 +{
    1.81 +    aResult.Truncate();
    1.82 +    nsresult rv;
    1.83 +
    1.84 +    // get parameter (decode RFC 2231/5987 when applicable, as specified by
    1.85 +    // aDecoding (5987 being a subset of 2231) and return charset.)
    1.86 +    nsXPIDLCString med;
    1.87 +    nsXPIDLCString charset;
    1.88 +    rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName, 
    1.89 +                             aDecoding, getter_Copies(charset), aLang, 
    1.90 +                             getter_Copies(med));
    1.91 +    if (NS_FAILED(rv))
    1.92 +        return rv; 
    1.93 +
    1.94 +    // convert to UTF-8 after charset conversion and RFC 2047 decoding 
    1.95 +    // if necessary.
    1.96 +    
    1.97 +    nsAutoCString str1;
    1.98 +    rv = internalDecodeParameter(med, charset.get(), nullptr, false,
    1.99 +                                 // was aDecoding == MIME_FIELD_ENCODING
   1.100 +                                 // see bug 875615
   1.101 +                                 true,
   1.102 +                                 str1);
   1.103 +    NS_ENSURE_SUCCESS(rv, rv);
   1.104 +
   1.105 +    if (!aFallbackCharset.IsEmpty())
   1.106 +    {
   1.107 +        nsAutoCString charset;
   1.108 +        EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset);
   1.109 +        nsAutoCString str2;
   1.110 +        nsCOMPtr<nsIUTF8ConverterService> 
   1.111 +          cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
   1.112 +        if (cvtUTF8 &&
   1.113 +            NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1, 
   1.114 +                PromiseFlatCString(aFallbackCharset).get(), false,
   1.115 +                                   !charset.EqualsLiteral("UTF-8"),
   1.116 +                                   1, str2))) {
   1.117 +          CopyUTF8toUTF16(str2, aResult);
   1.118 +          return NS_OK;
   1.119 +        }
   1.120 +    }
   1.121 +
   1.122 +    if (IsUTF8(str1)) {
   1.123 +      CopyUTF8toUTF16(str1, aResult);
   1.124 +      return NS_OK;
   1.125 +    }
   1.126 +
   1.127 +    if (aTryLocaleCharset && !NS_IsNativeUTF8()) 
   1.128 +      return NS_CopyNativeToUnicode(str1, aResult);
   1.129 +
   1.130 +    CopyASCIItoUTF16(str1, aResult);
   1.131 +    return NS_OK;
   1.132 +}
   1.133 +
   1.134 +// remove backslash-encoded sequences from quoted-strings
   1.135 +// modifies string in place, potentially shortening it
   1.136 +void RemoveQuotedStringEscapes(char *src)
   1.137 +{
   1.138 +  char *dst = src;
   1.139 +
   1.140 +  for (char *c = src; *c; ++c)
   1.141 +  {
   1.142 +    if (c[0] == '\\' && c[1])
   1.143 +    {
   1.144 +      // skip backslash if not at end
   1.145 +      ++c;
   1.146 +    }
   1.147 +    *dst++ = *c;
   1.148 +  }
   1.149 +  *dst = 0;
   1.150 +}
   1.151 +
   1.152 +// true is character is a hex digit
   1.153 +bool IsHexDigit(char aChar)
   1.154 +{
   1.155 +  char c = aChar;
   1.156 +
   1.157 +  return (c >= 'a' && c <= 'f') ||
   1.158 +         (c >= 'A' && c <= 'F') ||
   1.159 +         (c >= '0' && c <= '9');
   1.160 +}
   1.161 +
   1.162 +// validate that a C String containing %-escapes is syntactically valid
   1.163 +bool IsValidPercentEscaped(const char *aValue, int32_t len)
   1.164 +{
   1.165 +  for (int32_t i = 0; i < len; i++) {
   1.166 +    if (aValue[i] == '%') {
   1.167 +      if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
   1.168 +        return false;
   1.169 +      }
   1.170 +    }
   1.171 +  }
   1.172 +  return true;
   1.173 +}
   1.174 +
   1.175 +// Support for continuations (RFC 2231, Section 3)
   1.176 +
   1.177 +// only a sane number supported
   1.178 +#define MAX_CONTINUATIONS 999
   1.179 +
   1.180 +// part of a continuation
   1.181 +
   1.182 +class Continuation {
   1.183 +  public:
   1.184 +    Continuation(const char *aValue, uint32_t aLength,
   1.185 +                 bool aNeedsPercentDecoding, bool aWasQuotedString) {
   1.186 +      value = aValue;
   1.187 +      length = aLength;
   1.188 +      needsPercentDecoding = aNeedsPercentDecoding;
   1.189 +      wasQuotedString = aWasQuotedString;
   1.190 +    }
   1.191 +    Continuation() {
   1.192 +      // empty constructor needed for nsTArray
   1.193 +      value = 0L;
   1.194 +      length = 0;
   1.195 +      needsPercentDecoding = false;
   1.196 +      wasQuotedString = false;
   1.197 +    }
   1.198 +    ~Continuation() {}
   1.199 +
   1.200 +    const char *value;
   1.201 +    uint32_t length;
   1.202 +    bool needsPercentDecoding;
   1.203 +    bool wasQuotedString;
   1.204 +};
   1.205 +
   1.206 +// combine segments into a single string, returning the allocated string
   1.207 +// (or nullptr) while emptying the list 
   1.208 +char *combineContinuations(nsTArray<Continuation>& aArray)
   1.209 +{
   1.210 +  // Sanity check
   1.211 +  if (aArray.Length() == 0)
   1.212 +    return nullptr;
   1.213 +
   1.214 +  // Get an upper bound for the length
   1.215 +  uint32_t length = 0;
   1.216 +  for (uint32_t i = 0; i < aArray.Length(); i++) {
   1.217 +    length += aArray[i].length;
   1.218 +  }
   1.219 +
   1.220 +  // Allocate
   1.221 +  char *result = (char *) nsMemory::Alloc(length + 1);
   1.222 +
   1.223 +  // Concatenate
   1.224 +  if (result) {
   1.225 +    *result = '\0';
   1.226 +
   1.227 +    for (uint32_t i = 0; i < aArray.Length(); i++) {
   1.228 +      Continuation cont = aArray[i];
   1.229 +      if (! cont.value) break;
   1.230 +
   1.231 +      char *c = result + strlen(result);
   1.232 +      strncat(result, cont.value, cont.length);
   1.233 +      if (cont.needsPercentDecoding) {
   1.234 +        nsUnescape(c);
   1.235 +      }
   1.236 +      if (cont.wasQuotedString) {
   1.237 +        RemoveQuotedStringEscapes(c);
   1.238 +      }
   1.239 +    }
   1.240 +
   1.241 +    // return null if empty value
   1.242 +    if (*result == '\0') {
   1.243 +      nsMemory::Free(result);
   1.244 +      result = nullptr;
   1.245 +    }
   1.246 +  } else {
   1.247 +    // Handle OOM
   1.248 +    NS_WARNING("Out of memory\n");
   1.249 +  }
   1.250 +
   1.251 +  return result;
   1.252 +}
   1.253 +
   1.254 +// add a continuation, return false on error if segment already has been seen
   1.255 +bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
   1.256 +                     const char *aValue, uint32_t aLength,
   1.257 +                     bool aNeedsPercentDecoding, bool aWasQuotedString)
   1.258 +{
   1.259 +  if (aIndex < aArray.Length() && aArray[aIndex].value) {
   1.260 +    NS_WARNING("duplicate RC2231 continuation segment #\n");
   1.261 +    return false;
   1.262 +  }
   1.263 +
   1.264 +  if (aIndex > MAX_CONTINUATIONS) {
   1.265 +    NS_WARNING("RC2231 continuation segment # exceeds limit\n");
   1.266 +    return false;
   1.267 +  }
   1.268 +
   1.269 +  if (aNeedsPercentDecoding && aWasQuotedString) {
   1.270 +    NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
   1.271 +    return false;
   1.272 +  }
   1.273 +
   1.274 +  Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
   1.275 +
   1.276 +  if (aArray.Length() <= aIndex) {
   1.277 +    aArray.SetLength(aIndex + 1);
   1.278 +  }
   1.279 +  aArray[aIndex] = cont;
   1.280 +
   1.281 +  return true;
   1.282 +}
   1.283 +
   1.284 +// parse a segment number; return -1 on error
   1.285 +int32_t parseSegmentNumber(const char *aValue, int32_t aLen)
   1.286 +{
   1.287 +  if (aLen < 1) {
   1.288 +    NS_WARNING("segment number missing\n");
   1.289 +    return -1;
   1.290 +  }
   1.291 +
   1.292 +  if (aLen > 1 && aValue[0] == '0') {
   1.293 +    NS_WARNING("leading '0' not allowed in segment number\n");
   1.294 +    return -1;
   1.295 +  }
   1.296 +
   1.297 +  int32_t segmentNumber = 0;
   1.298 +
   1.299 +  for (int32_t i = 0; i < aLen; i++) {
   1.300 +    if (! (aValue[i] >= '0' && aValue[i] <= '9')) {
   1.301 +      NS_WARNING("invalid characters in segment number\n");
   1.302 +      return -1;
   1.303 +    }
   1.304 +
   1.305 +    segmentNumber *= 10;
   1.306 +    segmentNumber += aValue[i] - '0';
   1.307 +    if (segmentNumber > MAX_CONTINUATIONS) {
   1.308 +      NS_WARNING("Segment number exceeds sane size\n");
   1.309 +      return -1;
   1.310 +    }
   1.311 +  }
   1.312 +
   1.313 +  return segmentNumber;
   1.314 +}
   1.315 +
   1.316 +// validate a given octet sequence for compliance with the specified
   1.317 +// encoding
   1.318 +bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
   1.319 +{
   1.320 +  nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
   1.321 +    (NS_UTF8CONVERTERSERVICE_CONTRACTID));
   1.322 +  if (!cvtUTF8) {
   1.323 +    NS_WARNING("Can't get UTF8ConverterService\n");
   1.324 +    return false;
   1.325 +  }
   1.326 +
   1.327 +  nsAutoCString tmpRaw;
   1.328 +  tmpRaw.Assign(aOctets);
   1.329 +  nsAutoCString tmpDecoded;
   1.330 +
   1.331 +  nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
   1.332 +                                             PromiseFlatCString(aCharset).get(),
   1.333 +                                             false, false, 1, tmpDecoded);
   1.334 +
   1.335 +  if (rv != NS_OK) {
   1.336 +    // we can't decode; charset may be unsupported, or the octet sequence
   1.337 +    // is broken (illegal or incomplete octet sequence contained)
   1.338 +    NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
   1.339 +    return false;
   1.340 +  }
   1.341 +
   1.342 +  return true;
   1.343 +}
   1.344 +
   1.345 +// moved almost verbatim from mimehdrs.cpp
   1.346 +// char *
   1.347 +// MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
   1.348 +//                            char **charset, char **language)
   1.349 +//
   1.350 +// The format of these header lines  is
   1.351 +// <token> [ ';' <token> '=' <token-or-quoted-string> ]*
   1.352 +NS_IMETHODIMP 
   1.353 +nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue, 
   1.354 +                                            const char *aParamName,
   1.355 +                                            char **aCharset,
   1.356 +                                            char **aLang,
   1.357 +                                            char **aResult)
   1.358 +{
   1.359 +  return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
   1.360 +                             aCharset, aLang, aResult);
   1.361 +}
   1.362 +
   1.363 +
   1.364 +nsresult 
   1.365 +nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue, 
   1.366 +                                           const char *aParamName,
   1.367 +                                           ParamDecoding aDecoding,
   1.368 +                                           char **aCharset,
   1.369 +                                           char **aLang,
   1.370 +                                           char **aResult)
   1.371 +{
   1.372 +
   1.373 +  if (!aHeaderValue ||  !*aHeaderValue || !aResult)
   1.374 +    return NS_ERROR_INVALID_ARG;
   1.375 +
   1.376 +  *aResult = nullptr;
   1.377 +
   1.378 +  if (aCharset) *aCharset = nullptr;
   1.379 +  if (aLang) *aLang = nullptr;
   1.380 +
   1.381 +  nsAutoCString charset;
   1.382 +
   1.383 +  // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
   1.384 +  // them for HTTP header fields later on, see bug 776324
   1.385 +  bool acceptContinuations = true;
   1.386 +
   1.387 +  const char *str = aHeaderValue;
   1.388 +
   1.389 +  // skip leading white space.
   1.390 +  for (; *str &&  nsCRT::IsAsciiSpace(*str); ++str)
   1.391 +    ;
   1.392 +  const char *start = str;
   1.393 +  
   1.394 +  // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
   1.395 +  // For instance, return 'inline' in the following case:
   1.396 +  // Content-Disposition: inline; filename=.....
   1.397 +  if (!aParamName || !*aParamName) 
   1.398 +    {
   1.399 +      for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
   1.400 +        ;
   1.401 +      if (str == start)
   1.402 +        return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
   1.403 +
   1.404 +      *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
   1.405 +      NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
   1.406 +      (*aResult)[str - start] = '\0';  // null-terminate
   1.407 +      return NS_OK;
   1.408 +    }
   1.409 +
   1.410 +  /* Skip forward to first ';' */
   1.411 +  for (; *str && *str != ';' && *str != ','; ++str)
   1.412 +    ;
   1.413 +  if (*str)
   1.414 +    str++;
   1.415 +  /* Skip over following whitespace */
   1.416 +  for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
   1.417 +    ;
   1.418 +
   1.419 +  // Some broken http servers just specify parameters
   1.420 +  // like 'filename' without specifying disposition
   1.421 +  // method. Rewind to the first non-white-space
   1.422 +  // character.
   1.423 +  
   1.424 +  if (!*str)
   1.425 +    str = start;
   1.426 +
   1.427 +  // RFC2231 - The legitimate parm format can be:
   1.428 +  // A. title=ThisIsTitle 
   1.429 +  // B. title*=us-ascii'en-us'This%20is%20wierd.
   1.430 +  // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
   1.431 +  //    title*1*=have%20to%20support%20this.
   1.432 +  //    title*2="Else..."
   1.433 +  // D. title*0="Hey, what you think you are doing?"
   1.434 +  //    title*1="There is no charset and lang info."
   1.435 +  // RFC5987: only A and B
   1.436 +  
   1.437 +  // collect results for the different algorithms (plain filename,
   1.438 +  // RFC5987/2231-encoded filename, + continuations) separately and decide
   1.439 +  // which to use at the end
   1.440 +  char *caseAResult = nullptr;
   1.441 +  char *caseBResult = nullptr;
   1.442 +  char *caseCDResult = nullptr;
   1.443 +
   1.444 +  // collect continuation segments
   1.445 +  nsTArray<Continuation> segments;
   1.446 +
   1.447 +
   1.448 +  // our copies of the charset parameter, kept separately as they might
   1.449 +  // differ for the two formats
   1.450 +  nsDependentCSubstring charsetB, charsetCD;
   1.451 +
   1.452 +  nsDependentCSubstring lang;
   1.453 +
   1.454 +  int32_t paramLen = strlen(aParamName);
   1.455 +
   1.456 +  while (*str) {
   1.457 +    // find name/value
   1.458 +
   1.459 +    const char *nameStart = str;
   1.460 +    const char *nameEnd = nullptr;
   1.461 +    const char *valueStart = str;
   1.462 +    const char *valueEnd = nullptr;
   1.463 +    bool isQuotedString = false;
   1.464 +
   1.465 +    NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
   1.466 +
   1.467 +    // Skip forward to the end of this token. 
   1.468 +    for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
   1.469 +      ;
   1.470 +    nameEnd = str;
   1.471 +
   1.472 +    int32_t nameLen = nameEnd - nameStart;
   1.473 +
   1.474 +    // Skip over whitespace, '=', and whitespace
   1.475 +    while (nsCRT::IsAsciiSpace(*str)) ++str;
   1.476 +    if (!*str) {
   1.477 +      break;
   1.478 +    }
   1.479 +    if (*str++ != '=') {
   1.480 +      // don't accept parameters without "="
   1.481 +      goto increment_str;
   1.482 +    }
   1.483 +    while (nsCRT::IsAsciiSpace(*str)) ++str;
   1.484 +
   1.485 +    if (*str != '"') {
   1.486 +      // The value is a token, not a quoted string.
   1.487 +      valueStart = str;
   1.488 +      for (valueEnd = str;
   1.489 +           *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
   1.490 +           valueEnd++)
   1.491 +        ;
   1.492 +      str = valueEnd;
   1.493 +    } else {
   1.494 +      isQuotedString = true;
   1.495 +      
   1.496 +      ++str;
   1.497 +      valueStart = str;
   1.498 +      for (valueEnd = str; *valueEnd; ++valueEnd) {
   1.499 +        if (*valueEnd == '\\' && *(valueEnd + 1))
   1.500 +          ++valueEnd;
   1.501 +        else if (*valueEnd == '"')
   1.502 +          break;
   1.503 +      }
   1.504 +      str = valueEnd;
   1.505 +      // *valueEnd != null means that *valueEnd is quote character.
   1.506 +      if (*valueEnd)
   1.507 +        str++;
   1.508 +    }
   1.509 +
   1.510 +    // See if this is the simplest case (case A above),
   1.511 +    // a 'single' line value with no charset and lang.
   1.512 +    // If so, copy it and return.
   1.513 +    if (nameLen == paramLen &&
   1.514 +        !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
   1.515 +
   1.516 +      if (caseAResult) {
   1.517 +        // we already have one caseA result, ignore subsequent ones
   1.518 +        goto increment_str;
   1.519 +      }
   1.520 +
   1.521 +      // if the parameter spans across multiple lines we have to strip out the
   1.522 +      //     line continuation -- jht 4/29/98 
   1.523 +      nsAutoCString tempStr(valueStart, valueEnd - valueStart);
   1.524 +      tempStr.StripChars("\r\n");
   1.525 +      char *res = ToNewCString(tempStr);
   1.526 +      NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
   1.527 +      
   1.528 +      if (isQuotedString)
   1.529 +        RemoveQuotedStringEscapes(res);
   1.530 +
   1.531 +      caseAResult = res;
   1.532 +      // keep going, we may find a RFC 2231/5987 encoded alternative
   1.533 +    }
   1.534 +    // case B, C, and D
   1.535 +    else if (nameLen > paramLen &&
   1.536 +             !nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
   1.537 +             *(nameStart + paramLen) == '*') {
   1.538 +
   1.539 +      // 1st char past '*'       
   1.540 +      const char *cp = nameStart + paramLen + 1; 
   1.541 +
   1.542 +      // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
   1.543 +      bool needExtDecoding = *(nameEnd - 1) == '*';      
   1.544 +
   1.545 +      bool caseB = nameLen == paramLen + 1;
   1.546 +      bool caseCStart = (*cp == '0') && needExtDecoding;
   1.547 +
   1.548 +      // parse the segment number
   1.549 +      int32_t segmentNumber = -1;
   1.550 +      if (!caseB) {
   1.551 +        int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
   1.552 +        segmentNumber = parseSegmentNumber(cp, segLen);
   1.553 +
   1.554 +        if (segmentNumber == -1) {
   1.555 +          acceptContinuations = false;
   1.556 +          goto increment_str;
   1.557 +        }
   1.558 +      }
   1.559 +
   1.560 +      // CaseB and start of CaseC: requires charset and optional language
   1.561 +      // in quotes (quotes required even if lang is blank)
   1.562 +      if (caseB || (caseCStart && acceptContinuations)) {
   1.563 +        // look for single quotation mark(')
   1.564 +        const char *sQuote1 = PL_strchr(valueStart, 0x27);
   1.565 +        const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
   1.566 +
   1.567 +        // Two single quotation marks must be present even in
   1.568 +        // absence of charset and lang. 
   1.569 +        if (!sQuote1 || !sQuote2) {
   1.570 +          NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
   1.571 +        }
   1.572 +
   1.573 +        const char *charsetStart = nullptr;
   1.574 +        int32_t charsetLength = 0;
   1.575 +        const char *langStart = nullptr;
   1.576 +        int32_t langLength = 0;
   1.577 +        const char *rawValStart = nullptr;
   1.578 +        int32_t rawValLength = 0;
   1.579 +
   1.580 +        if (sQuote2 && sQuote1) {
   1.581 +          // both delimiters present: charSet'lang'rawVal
   1.582 +          rawValStart = sQuote2 + 1;
   1.583 +          rawValLength = valueEnd - rawValStart;
   1.584 +
   1.585 +          langStart = sQuote1 + 1;
   1.586 +          langLength = sQuote2 - langStart;
   1.587 +
   1.588 +          charsetStart = valueStart;
   1.589 +          charsetLength = sQuote1 - charsetStart;
   1.590 +        }
   1.591 +        else if (sQuote1) {
   1.592 +          // one delimiter; assume charset'rawVal
   1.593 +          rawValStart = sQuote1 + 1;
   1.594 +          rawValLength = valueEnd - rawValStart;
   1.595 +
   1.596 +          charsetStart = valueStart;
   1.597 +          charsetLength = sQuote1 - valueStart;
   1.598 +        }
   1.599 +        else {
   1.600 +          // no delimiter: just rawVal
   1.601 +          rawValStart = valueStart;
   1.602 +          rawValLength = valueEnd - valueStart;
   1.603 +        }
   1.604 +
   1.605 +        if (langLength != 0) {
   1.606 +          lang.Assign(langStart, langLength);
   1.607 +        }
   1.608 +
   1.609 +        // keep the charset for later
   1.610 +        if (caseB) {
   1.611 +          charsetB.Assign(charsetStart, charsetLength);
   1.612 +        } else {
   1.613 +          // if caseCorD
   1.614 +          charsetCD.Assign(charsetStart, charsetLength);
   1.615 +        }
   1.616 +
   1.617 +        // non-empty value part
   1.618 +        if (rawValLength > 0) {
   1.619 +          if (!caseBResult && caseB) {
   1.620 +            if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
   1.621 +              goto increment_str;
   1.622 +            }
   1.623 +
   1.624 +            // allocate buffer for the raw value
   1.625 +            char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1);
   1.626 +            if (!tmpResult) {
   1.627 +              goto increment_str;
   1.628 +            }
   1.629 +            *(tmpResult + rawValLength) = 0;
   1.630 +
   1.631 +            nsUnescape(tmpResult);
   1.632 +            caseBResult = tmpResult;
   1.633 +          } else {
   1.634 +            // caseC
   1.635 +            bool added = addContinuation(segments, 0, rawValStart,
   1.636 +                                         rawValLength, needExtDecoding,
   1.637 +                                         isQuotedString);
   1.638 +
   1.639 +            if (!added) {
   1.640 +              // continuation not added, stop processing them
   1.641 +              acceptContinuations = false;
   1.642 +            }
   1.643 +          }
   1.644 +        }
   1.645 +      }  // end of if-block :  title*0*=  or  title*= 
   1.646 +      // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
   1.647 +      // or 2nd or later lines of a caseC param : title*[1-9]*= 
   1.648 +      else if (acceptContinuations && segmentNumber != -1) {
   1.649 +        uint32_t valueLength = valueEnd - valueStart;
   1.650 +
   1.651 +        bool added = addContinuation(segments, segmentNumber, valueStart,
   1.652 +                                     valueLength, needExtDecoding,
   1.653 +                                     isQuotedString);
   1.654 +
   1.655 +        if (!added) {
   1.656 +          // continuation not added, stop processing them
   1.657 +          acceptContinuations = false;
   1.658 +        }
   1.659 +      } // end of if-block :  title*[0-9]= or title*[1-9]*=
   1.660 +    }
   1.661 +
   1.662 +    // str now points after the end of the value.
   1.663 +    //   skip over whitespace, ';', whitespace.
   1.664 +increment_str:      
   1.665 +    while (nsCRT::IsAsciiSpace(*str)) ++str;
   1.666 +    if (*str == ';') {
   1.667 +      ++str;
   1.668 +    } else {
   1.669 +      // stop processing the header field; either we are done or the
   1.670 +      // separator was missing
   1.671 +      break;
   1.672 +    }
   1.673 +    while (nsCRT::IsAsciiSpace(*str)) ++str;
   1.674 +  }
   1.675 +
   1.676 +  caseCDResult = combineContinuations(segments);
   1.677 +
   1.678 +  if (caseBResult && !charsetB.IsEmpty()) {
   1.679 +    // check that the 2231/5987 result decodes properly given the
   1.680 +    // specified character set
   1.681 +    if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
   1.682 +      caseBResult = nullptr;
   1.683 +  }
   1.684 +
   1.685 +  if (caseCDResult && !charsetCD.IsEmpty()) {
   1.686 +    // check that the 2231/5987 result decodes properly given the
   1.687 +    // specified character set
   1.688 +    if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
   1.689 +      caseCDResult = nullptr;
   1.690 +  }
   1.691 +
   1.692 +  if (caseBResult) {
   1.693 +    // prefer simple 5987 format over 2231 with continuations
   1.694 +    *aResult = caseBResult;
   1.695 +    caseBResult = nullptr;
   1.696 +    charset.Assign(charsetB);
   1.697 +  }
   1.698 +  else if (caseCDResult) {
   1.699 +    // prefer 2231/5987 with or without continuations over plain format
   1.700 +    *aResult = caseCDResult;
   1.701 +    caseCDResult = nullptr;
   1.702 +    charset.Assign(charsetCD);
   1.703 +  }
   1.704 +  else if (caseAResult) {
   1.705 +    *aResult = caseAResult;
   1.706 +    caseAResult = nullptr;
   1.707 +  }
   1.708 +
   1.709 +  // free unused stuff
   1.710 +  nsMemory::Free(caseAResult);
   1.711 +  nsMemory::Free(caseBResult);
   1.712 +  nsMemory::Free(caseCDResult);
   1.713 +
   1.714 +  // if we have a result
   1.715 +  if (*aResult) {
   1.716 +    // then return charset and lang as well
   1.717 +    if (aLang && !lang.IsEmpty()) {
   1.718 +      uint32_t len = lang.Length();
   1.719 +      *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1);
   1.720 +      if (*aLang) {
   1.721 +        *(*aLang + len) = 0;
   1.722 +      }
   1.723 +   }
   1.724 +    if (aCharset && !charset.IsEmpty()) {
   1.725 +      uint32_t len = charset.Length();
   1.726 +      *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1);
   1.727 +      if (*aCharset) {
   1.728 +        *(*aCharset + len) = 0;
   1.729 +      }
   1.730 +    }
   1.731 +  }
   1.732 +
   1.733 +  return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
   1.734 +}
   1.735 +
   1.736 +nsresult
   1.737 +internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
   1.738 +                            bool aOverrideCharset, bool aEatContinuations,
   1.739 +                            nsACString& aResult)
   1.740 +{
   1.741 +  aResult.Truncate();
   1.742 +  if (!aHeaderVal)
   1.743 +    return NS_ERROR_INVALID_ARG;
   1.744 +  if (!*aHeaderVal)
   1.745 +    return NS_OK;
   1.746 +
   1.747 +
   1.748 +  // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string  but
   1.749 +  // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
   1.750 +  // to UTF-8. Otherwise, just strips away CRLF. 
   1.751 +  if (PL_strstr(aHeaderVal, "=?") || 
   1.752 +      (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) || 
   1.753 +      Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
   1.754 +    DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
   1.755 +  } else if (aEatContinuations && 
   1.756 +             (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
   1.757 +    aResult = aHeaderVal;
   1.758 +  } else {
   1.759 +    aEatContinuations = false;
   1.760 +    aResult = aHeaderVal;
   1.761 +  }
   1.762 +
   1.763 +  if (aEatContinuations) {
   1.764 +    nsAutoCString temp(aResult);
   1.765 +    temp.ReplaceSubstring("\n\t", " ");
   1.766 +    temp.ReplaceSubstring("\r\t", " ");
   1.767 +    temp.StripChars("\r\n");
   1.768 +    aResult = temp;
   1.769 +  }
   1.770 +
   1.771 +  return NS_OK;
   1.772 +}
   1.773 +
   1.774 +NS_IMETHODIMP
   1.775 +nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal, 
   1.776 +                                           const char* aDefaultCharset, 
   1.777 +                                           bool aOverrideCharset, 
   1.778 +                                           bool aEatContinuations,
   1.779 +                                           nsACString& aResult)
   1.780 +{
   1.781 +  return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
   1.782 +                                     aOverrideCharset, aEatContinuations,
   1.783 +                                     aResult);
   1.784 +}
   1.785 +
   1.786 +// true if the character is allowed in a RFC 5987 value
   1.787 +// see RFC 5987, Section 3.2.1, "attr-char"
   1.788 +bool IsRFC5987AttrChar(char aChar)
   1.789 +{
   1.790 +  char c = aChar;
   1.791 +
   1.792 +  return (c >= 'a' && c <= 'z') ||
   1.793 +         (c >= 'A' && c <= 'Z') ||
   1.794 +         (c >= '0' && c <= '9') ||
   1.795 +         (c == '!' || c == '#' || c == '$' || c == '&' ||
   1.796 +          c == '+' || c == '-' || c == '.' || c == '^' ||
   1.797 +          c == '_' || c == '`' || c == '|' || c == '~');
   1.798 +}
   1.799 +
   1.800 +// percent-decode a value
   1.801 +// returns false on failure
   1.802 +bool PercentDecode(nsACString& aValue)
   1.803 +{
   1.804 +  char *c = (char *) nsMemory::Alloc(aValue.Length() + 1);
   1.805 +  if (!c) {
   1.806 +    return false;
   1.807 +  }
   1.808 +
   1.809 +  strcpy(c, PromiseFlatCString(aValue).get());
   1.810 +  nsUnescape(c);
   1.811 +  aValue.Assign(c);
   1.812 +  nsMemory::Free(c);
   1.813 +
   1.814 +  return true;
   1.815 +}
   1.816 +
   1.817 +// Decode a parameter value using the encoding defined in RFC 5987
   1.818 +// 
   1.819 +// charset  "'" [ language ] "'" value-chars
   1.820 +NS_IMETHODIMP 
   1.821 +nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
   1.822 +                                          nsACString& aLang,
   1.823 +                                          nsAString& aResult)
   1.824 +{
   1.825 +  nsAutoCString charset;
   1.826 +  nsAutoCString language;
   1.827 +  nsAutoCString value;
   1.828 +
   1.829 +  uint32_t delimiters = 0;
   1.830 +  const char *encoded = PromiseFlatCString(aParamVal).get();
   1.831 +  const char *c = encoded;
   1.832 +
   1.833 +  while (*c) {
   1.834 +    char tc = *c++;
   1.835 +
   1.836 +    if (tc == '\'') {
   1.837 +      // single quote
   1.838 +      delimiters++;
   1.839 +    } else if (((unsigned char)tc) >= 128) {
   1.840 +      // fail early, not ASCII
   1.841 +      NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
   1.842 +      return NS_ERROR_INVALID_ARG;
   1.843 +    } else {
   1.844 +      if (delimiters == 0) {
   1.845 +        // valid characters are checked later implicitly
   1.846 +        charset.Append(tc);
   1.847 +      } else if (delimiters == 1) {
   1.848 +        // no value checking for now
   1.849 +        language.Append(tc);
   1.850 +      } else if (delimiters == 2) {
   1.851 +        if (IsRFC5987AttrChar(tc)) {
   1.852 +          value.Append(tc);
   1.853 +        } else if (tc == '%') {
   1.854 +          if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
   1.855 +            // we expect two more characters
   1.856 +            NS_WARNING("broken %-escape in RFC5987-encoded param");
   1.857 +            return NS_ERROR_INVALID_ARG;
   1.858 +          }
   1.859 +          value.Append(tc);
   1.860 +          // we consume two more
   1.861 +          value.Append(*c++);
   1.862 +          value.Append(*c++);
   1.863 +        } else {
   1.864 +          // character not allowed here
   1.865 +          NS_WARNING("invalid character in RFC5987-encoded param");
   1.866 +          return NS_ERROR_INVALID_ARG;
   1.867 +        }      
   1.868 +      }
   1.869 +    }
   1.870 +  }
   1.871 +
   1.872 +  if (delimiters != 2) {
   1.873 +    NS_WARNING("missing delimiters in RFC5987-encoded param");
   1.874 +    return NS_ERROR_INVALID_ARG;
   1.875 +  }
   1.876 +
   1.877 +  // abort early for unsupported encodings
   1.878 +  if (!charset.LowerCaseEqualsLiteral("utf-8")) {
   1.879 +    NS_WARNING("unsupported charset in RFC5987-encoded param");
   1.880 +    return NS_ERROR_INVALID_ARG;
   1.881 +  }
   1.882 +
   1.883 +  // percent-decode
   1.884 +  if (!PercentDecode(value)) {
   1.885 +    return NS_ERROR_OUT_OF_MEMORY;
   1.886 +  }
   1.887 +
   1.888 +  // return the encoding
   1.889 +  aLang.Assign(language);
   1.890 +
   1.891 +  // finally convert octet sequence to UTF-8 and be done
   1.892 +  nsresult rv = NS_OK;
   1.893 +  nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 =
   1.894 +    do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv);
   1.895 +  NS_ENSURE_SUCCESS(rv, rv);
   1.896 +
   1.897 +  nsAutoCString utf8;
   1.898 +  rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8);
   1.899 +  NS_ENSURE_SUCCESS(rv, rv);
   1.900 +
   1.901 +  CopyUTF8toUTF16(utf8, aResult);
   1.902 +  return NS_OK;
   1.903 +}
   1.904 +
   1.905 +nsresult 
   1.906 +internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
   1.907 +                        const char* aDefaultCharset, bool aOverrideCharset,
   1.908 +                        bool aDecode2047, nsACString& aResult)
   1.909 +{
   1.910 +  aResult.Truncate();
   1.911 +  // If aCharset is given, aParamValue was obtained from RFC2231/5987 
   1.912 +  // encoding and we're pretty sure that it's in aCharset.
   1.913 +  if (aCharset && *aCharset)
   1.914 +  {
   1.915 +    nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
   1.916 +    if (cvtUTF8)
   1.917 +      return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
   1.918 +          true, true, 1, aResult);
   1.919 +  }
   1.920 +
   1.921 +  const nsAFlatCString& param = PromiseFlatCString(aParamValue);
   1.922 +  nsAutoCString unQuoted;
   1.923 +  nsACString::const_iterator s, e;
   1.924 +  param.BeginReading(s);
   1.925 +  param.EndReading(e);
   1.926 +
   1.927 +  // strip '\' when used to quote CR, LF, '"' and '\'
   1.928 +  for ( ; s != e; ++s) {
   1.929 +    if ((*s == '\\')) {
   1.930 +      if (++s == e) {
   1.931 +        --s; // '\' is at the end. move back and append '\'.
   1.932 +      }
   1.933 +      else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
   1.934 +        --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
   1.935 +      }
   1.936 +      // else : skip '\' and append the quoted character.
   1.937 +    }
   1.938 +    unQuoted.Append(*s);
   1.939 +  }
   1.940 +
   1.941 +  aResult = unQuoted;
   1.942 +  nsresult rv = NS_OK;
   1.943 +  
   1.944 +  if (aDecode2047) {
   1.945 +    nsAutoCString decoded;
   1.946 +
   1.947 +    // Try RFC 2047 encoding, instead.
   1.948 +    rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
   1.949 +                                     aOverrideCharset, true, decoded);
   1.950 +
   1.951 +    if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
   1.952 +      aResult = decoded;
   1.953 +  }
   1.954 +    
   1.955 +  return rv;
   1.956 +}
   1.957 +
   1.958 +NS_IMETHODIMP
   1.959 +nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
   1.960 +                                       const char* aCharset,
   1.961 +                                       const char* aDefaultCharset,
   1.962 +                                       bool aOverrideCharset,
   1.963 +                                       nsACString& aResult)
   1.964 +{
   1.965 +  return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
   1.966 +                                 aOverrideCharset, true, aResult);
   1.967 +}
   1.968 +
   1.969 +#define ISHEXCHAR(c) \
   1.970 +        ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39)  ||  \
   1.971 +         (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46)  ||  \
   1.972 +         (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
   1.973 +
   1.974 +// Decode Q encoding (RFC 2047).
   1.975 +// static
   1.976 +char *DecodeQ(const char *in, uint32_t length)
   1.977 +{
   1.978 +  char *out, *dest = 0;
   1.979 +
   1.980 +  out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
   1.981 +  if (dest == nullptr)
   1.982 +    return nullptr;
   1.983 +  while (length > 0) {
   1.984 +    unsigned c = 0;
   1.985 +    switch (*in) {
   1.986 +    case '=':
   1.987 +      // check if |in| in the form of '=hh'  where h is [0-9a-fA-F].
   1.988 +      if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
   1.989 +        goto badsyntax;
   1.990 +      PR_sscanf(in + 1, "%2X", &c);
   1.991 +      *out++ = (char) c;
   1.992 +      in += 3;
   1.993 +      length -= 3;
   1.994 +      break;
   1.995 +
   1.996 +    case '_':
   1.997 +      *out++ = ' ';
   1.998 +      in++;
   1.999 +      length--;
  1.1000 +      break;
  1.1001 +
  1.1002 +    default:
  1.1003 +      if (*in & 0x80) goto badsyntax;
  1.1004 +      *out++ = *in++;
  1.1005 +      length--;
  1.1006 +    }
  1.1007 +  }
  1.1008 +  *out++ = '\0';
  1.1009 +
  1.1010 +  for (out = dest; *out ; ++out) {
  1.1011 +    if (*out == '\t')
  1.1012 +      *out = ' ';
  1.1013 +  }
  1.1014 +
  1.1015 +  return dest;
  1.1016 +
  1.1017 + badsyntax:
  1.1018 +  PR_Free(dest);
  1.1019 +  return nullptr;
  1.1020 +}
  1.1021 +
  1.1022 +// check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842)) 
  1.1023 +// or has  ESC which may be an  indication that  it's in one of many ISO 
  1.1024 +// 2022 7bit  encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
  1.1025 +// static
  1.1026 +bool Is7bitNonAsciiString(const char *input, uint32_t len)
  1.1027 +{
  1.1028 +  int32_t c;
  1.1029 +
  1.1030 +  enum { hz_initial, // No HZ seen yet
  1.1031 +         hz_escaped, // Inside an HZ ~{ escape sequence 
  1.1032 +         hz_seen, // Have seen at least one complete HZ sequence 
  1.1033 +         hz_notpresent // Have seen something that is not legal HZ
  1.1034 +  } hz_state;
  1.1035 +
  1.1036 +  hz_state = hz_initial;
  1.1037 +  while (len) {
  1.1038 +    c = uint8_t(*input++);
  1.1039 +    len--;
  1.1040 +    if (c & 0x80) return false;
  1.1041 +    if (c == 0x1B) return true;
  1.1042 +    if (c == '~') {
  1.1043 +      switch (hz_state) {
  1.1044 +      case hz_initial:
  1.1045 +      case hz_seen:
  1.1046 +        if (*input == '{') {
  1.1047 +          hz_state = hz_escaped;
  1.1048 +        } else if (*input == '~') {
  1.1049 +          // ~~ is the HZ encoding of ~.  Skip over second ~ as well
  1.1050 +          hz_state = hz_seen;
  1.1051 +          input++;
  1.1052 +          len--;
  1.1053 +        } else {
  1.1054 +          hz_state = hz_notpresent;
  1.1055 +        }
  1.1056 +        break;
  1.1057 +
  1.1058 +      case hz_escaped:
  1.1059 +        if (*input == '}') hz_state = hz_seen;
  1.1060 +        break;
  1.1061 +      default:
  1.1062 +        break;
  1.1063 +      }
  1.1064 +    }
  1.1065 +  }
  1.1066 +  return hz_state == hz_seen;
  1.1067 +}
  1.1068 +
  1.1069 +#define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
  1.1070 +
  1.1071 +// copy 'raw' sequences of octets in aInput to aOutput.
  1.1072 +// If aDefaultCharset is specified, the input is assumed to be in the
  1.1073 +// charset and converted to UTF-8. Otherwise, a blind copy is made.
  1.1074 +// If aDefaultCharset is specified, but the conversion to UTF-8
  1.1075 +// is not successful, each octet is replaced by Unicode replacement
  1.1076 +// chars. *aOutput is advanced by the number of output octets.
  1.1077 +// static
  1.1078 +void CopyRawHeader(const char *aInput, uint32_t aLen, 
  1.1079 +                   const char *aDefaultCharset, nsACString &aOutput)
  1.1080 +{
  1.1081 +  int32_t c;
  1.1082 +
  1.1083 +  // If aDefaultCharset is not specified, make a blind copy.
  1.1084 +  if (!aDefaultCharset || !*aDefaultCharset) {
  1.1085 +    aOutput.Append(aInput, aLen);
  1.1086 +    return;
  1.1087 +  }
  1.1088 +
  1.1089 +  // Copy as long as it's US-ASCII.  An ESC may indicate ISO 2022
  1.1090 +  // A ~ may indicate it is HZ
  1.1091 +  while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
  1.1092 +    aOutput.Append(char(c));
  1.1093 +    aLen--;
  1.1094 +  }
  1.1095 +  if (!aLen) {
  1.1096 +    return;
  1.1097 +  }
  1.1098 +  aInput--;
  1.1099 +
  1.1100 +  // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
  1.1101 +  // string and aDefaultCharset is a 7bit non-ascii charset.
  1.1102 +  bool skipCheck = (c == 0x1B || c == '~') && 
  1.1103 +                     IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
  1.1104 +
  1.1105 +  // If not UTF-8, treat as default charset
  1.1106 +  nsCOMPtr<nsIUTF8ConverterService> 
  1.1107 +    cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
  1.1108 +  nsAutoCString utf8Text;
  1.1109 +  if (cvtUTF8 &&
  1.1110 +      NS_SUCCEEDED(
  1.1111 +      cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen), 
  1.1112 +                                   aDefaultCharset, skipCheck, true, 1,
  1.1113 +                                   utf8Text))) {
  1.1114 +    aOutput.Append(utf8Text);
  1.1115 +  } else { // replace each octet with Unicode replacement char in UTF-8.
  1.1116 +    for (uint32_t i = 0; i < aLen; i++) {
  1.1117 +      c = uint8_t(*aInput++);
  1.1118 +      if (c & 0x80)
  1.1119 +        aOutput.Append(REPLACEMENT_CHAR);
  1.1120 +      else
  1.1121 +        aOutput.Append(char(c));
  1.1122 +    }
  1.1123 +  }
  1.1124 +}
  1.1125 +
  1.1126 +nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64,
  1.1127 +                            const char *aCharset, nsACString &aResult)
  1.1128 +{
  1.1129 +  char *decodedText;
  1.1130 +  NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
  1.1131 +  if(aQOrBase64 == 'Q')
  1.1132 +    decodedText = DecodeQ(aEncoded, aLen);
  1.1133 +  else if (aQOrBase64 == 'B') {
  1.1134 +    decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
  1.1135 +  } else {
  1.1136 +    return NS_ERROR_INVALID_ARG;
  1.1137 +  }
  1.1138 +
  1.1139 +  if (!decodedText) {
  1.1140 +    return NS_ERROR_INVALID_ARG;
  1.1141 +  }
  1.1142 +
  1.1143 +  nsresult rv;
  1.1144 +  nsCOMPtr<nsIUTF8ConverterService>
  1.1145 +    cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
  1.1146 +  nsAutoCString utf8Text;
  1.1147 +  if (NS_SUCCEEDED(rv)) {
  1.1148 +    // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
  1.1149 +    rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
  1.1150 +                                      aCharset,
  1.1151 +                                      IS_7BIT_NON_ASCII_CHARSET(aCharset),
  1.1152 +                                      true, 1, utf8Text);
  1.1153 +  }
  1.1154 +  PR_Free(decodedText);
  1.1155 +  if (NS_FAILED(rv)) {
  1.1156 +    return rv;
  1.1157 +  }
  1.1158 +  aResult.Append(utf8Text);
  1.1159 +
  1.1160 +  return NS_OK;
  1.1161 +}
  1.1162 +
  1.1163 +static const char especials[] = "()<>@,;:\\\"/[]?.=";
  1.1164 +
  1.1165 +// |decode_mime_part2_str| taken from comi18n.c
  1.1166 +// Decode RFC2047-encoded words in the input and convert the result to UTF-8.
  1.1167 +// If aOverrideCharset is true, charset in RFC2047-encoded words is 
  1.1168 +// ignored and aDefaultCharset is assumed, instead. aDefaultCharset
  1.1169 +// is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
  1.1170 +//static
  1.1171 +nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset, 
  1.1172 +                          bool aOverrideCharset, nsACString &aResult)
  1.1173 +{
  1.1174 +  const char *p, *q = nullptr, *r;
  1.1175 +  const char *begin; // tracking pointer for where we are in the input buffer
  1.1176 +  int32_t isLastEncodedWord = 0;
  1.1177 +  const char *charsetStart, *charsetEnd;
  1.1178 +  nsAutoCString prevCharset, curCharset;
  1.1179 +  nsAutoCString encodedText;
  1.1180 +  char prevEncoding = '\0', curEncoding;
  1.1181 +  nsresult rv;
  1.1182 +
  1.1183 +  begin = aHeader;
  1.1184 +
  1.1185 +  // To avoid buffer realloc, if possible, set capacity in advance. No 
  1.1186 +  // matter what,  more than 3x expansion can never happen for all charsets
  1.1187 +  // supported by Mozilla. SCSU/BCSU with the sliding window set to a
  1.1188 +  // non-BMP block may be exceptions, but Mozilla does not support them. 
  1.1189 +  // Neither any known mail/news program use them. Even if there's, we're
  1.1190 +  // safe because we don't use a raw *char any more.
  1.1191 +  aResult.SetCapacity(3 * strlen(aHeader));
  1.1192 +
  1.1193 +  while ((p = PL_strstr(begin, "=?")) != 0) {
  1.1194 +    if (isLastEncodedWord) {
  1.1195 +      // See if it's all whitespace.
  1.1196 +      for (q = begin; q < p; ++q) {
  1.1197 +        if (!PL_strchr(" \t\r\n", *q)) break;
  1.1198 +      }
  1.1199 +    }
  1.1200 +
  1.1201 +    if (!isLastEncodedWord || q < p) {
  1.1202 +      if (!encodedText.IsEmpty()) {
  1.1203 +        rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
  1.1204 +                                prevEncoding, prevCharset.get(), aResult);
  1.1205 +        if (NS_FAILED(rv)) {
  1.1206 +          aResult.Append(encodedText);
  1.1207 +        }
  1.1208 +        encodedText.Truncate();
  1.1209 +        prevCharset.Truncate();
  1.1210 +        prevEncoding = '\0';
  1.1211 +      }
  1.1212 +      // copy the part before the encoded-word
  1.1213 +      CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
  1.1214 +      begin = p;
  1.1215 +    }
  1.1216 +
  1.1217 +    p += 2;
  1.1218 +
  1.1219 +    // Get charset info
  1.1220 +    charsetStart = p;
  1.1221 +    charsetEnd = 0;
  1.1222 +    for (q = p; *q != '?'; q++) {
  1.1223 +      if (*q <= ' ' || PL_strchr(especials, *q)) {
  1.1224 +        goto badsyntax;
  1.1225 +      }
  1.1226 +
  1.1227 +      // RFC 2231 section 5
  1.1228 +      if (!charsetEnd && *q == '*') {
  1.1229 +        charsetEnd = q; 
  1.1230 +      }
  1.1231 +    }
  1.1232 +    if (!charsetEnd) {
  1.1233 +      charsetEnd = q;
  1.1234 +    }
  1.1235 +
  1.1236 +    q++;
  1.1237 +    curEncoding = nsCRT::ToUpper(*q);
  1.1238 +    if (curEncoding != 'Q' && curEncoding != 'B')
  1.1239 +      goto badsyntax;
  1.1240 +
  1.1241 +    if (q[1] != '?')
  1.1242 +      goto badsyntax;
  1.1243 +
  1.1244 +    r = q;
  1.1245 +    for (r = q + 2; *r != '?'; r++) {
  1.1246 +      if (*r < ' ') goto badsyntax;
  1.1247 +    }
  1.1248 +    if (r[1] != '=')
  1.1249 +        goto badsyntax;
  1.1250 +    else if (r == q + 2) {
  1.1251 +        // it's empty, skip
  1.1252 +        begin = r + 2;
  1.1253 +        isLastEncodedWord = 1;
  1.1254 +        continue;
  1.1255 +    }
  1.1256 +
  1.1257 +    curCharset.Assign(charsetStart, charsetEnd - charsetStart);
  1.1258 +    // Override charset if requested.  Never override labeled UTF-8.
  1.1259 +    // Use default charset instead of UNKNOWN-8BIT
  1.1260 +    if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8"))
  1.1261 +    || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))
  1.1262 +    ) {
  1.1263 +      curCharset = aDefaultCharset;
  1.1264 +    }
  1.1265 +
  1.1266 +    const char *R;
  1.1267 +    R = r;
  1.1268 +    if (curEncoding == 'B') {
  1.1269 +      // bug 227290. ignore an extraneous '=' at the end.
  1.1270 +      // (# of characters in B-encoded part has to be a multiple of 4)
  1.1271 +      int32_t n = r - (q + 2);
  1.1272 +      R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
  1.1273 +    }
  1.1274 +    // Bug 493544. Don't decode the encoded text until it ends
  1.1275 +    if (R[-1] != '='
  1.1276 +      && (prevCharset.IsEmpty()
  1.1277 +        || (curCharset == prevCharset && curEncoding == prevEncoding))
  1.1278 +    ) {
  1.1279 +      encodedText.Append(q + 2, R - (q + 2));
  1.1280 +      prevCharset = curCharset;
  1.1281 +      prevEncoding = curEncoding;
  1.1282 +
  1.1283 +      begin = r + 2;
  1.1284 +      isLastEncodedWord = 1;
  1.1285 +      continue;
  1.1286 +    }
  1.1287 +
  1.1288 +    bool bDecoded; // If the current line has been decoded.
  1.1289 +    bDecoded = false;
  1.1290 +    if (!encodedText.IsEmpty()) {
  1.1291 +      if (curCharset == prevCharset && curEncoding == prevEncoding) {
  1.1292 +        encodedText.Append(q + 2, R - (q + 2));
  1.1293 +        bDecoded = true;
  1.1294 +      }
  1.1295 +      rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
  1.1296 +                              prevEncoding, prevCharset.get(), aResult);
  1.1297 +      if (NS_FAILED(rv)) {
  1.1298 +        aResult.Append(encodedText);
  1.1299 +      }
  1.1300 +      encodedText.Truncate();
  1.1301 +      prevCharset.Truncate();
  1.1302 +      prevEncoding = '\0';
  1.1303 +    }
  1.1304 +    if (!bDecoded) {
  1.1305 +      rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding,
  1.1306 +                              curCharset.get(), aResult);
  1.1307 +      if (NS_FAILED(rv)) {
  1.1308 +        aResult.Append(encodedText);
  1.1309 +      }
  1.1310 +    }
  1.1311 +
  1.1312 +    begin = r + 2;
  1.1313 +    isLastEncodedWord = 1;
  1.1314 +    continue;
  1.1315 +
  1.1316 +  badsyntax:
  1.1317 +    if (!encodedText.IsEmpty()) {
  1.1318 +      rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
  1.1319 +                              prevEncoding, prevCharset.get(), aResult);
  1.1320 +      if (NS_FAILED(rv)) {
  1.1321 +        aResult.Append(encodedText);
  1.1322 +      }
  1.1323 +      encodedText.Truncate();
  1.1324 +      prevCharset.Truncate();
  1.1325 +    }
  1.1326 +    // copy the part before the encoded-word
  1.1327 +    aResult.Append(begin, p - begin);
  1.1328 +    begin = p;
  1.1329 +    isLastEncodedWord = 0;
  1.1330 +  }
  1.1331 +
  1.1332 +  if (!encodedText.IsEmpty()) {
  1.1333 +    rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
  1.1334 +                            prevEncoding, prevCharset.get(), aResult);
  1.1335 +    if (NS_FAILED(rv)) {
  1.1336 +      aResult.Append(encodedText);
  1.1337 +    }
  1.1338 +  }
  1.1339 +
  1.1340 +  // put the tail back
  1.1341 +  CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
  1.1342 +
  1.1343 +  nsAutoCString tempStr(aResult);
  1.1344 +  tempStr.ReplaceChar('\t', ' ');
  1.1345 +  aResult = tempStr;
  1.1346 +
  1.1347 +  return NS_OK;
  1.1348 +}

mercurial