netwerk/mime/nsMIMEHeaderParamImpl.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set sw=4 ts=8 et tw=80 : */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #include <string.h>
michael@0 8 #include "prmem.h"
michael@0 9 #include "prprf.h"
michael@0 10 #include "plstr.h"
michael@0 11 #include "plbase64.h"
michael@0 12 #include "nsCRT.h"
michael@0 13 #include "nsMemory.h"
michael@0 14 #include "nsTArray.h"
michael@0 15 #include "nsCOMPtr.h"
michael@0 16 #include "nsEscape.h"
michael@0 17 #include "nsIUTF8ConverterService.h"
michael@0 18 #include "nsUConvCID.h"
michael@0 19 #include "nsIServiceManager.h"
michael@0 20 #include "nsMIMEHeaderParamImpl.h"
michael@0 21 #include "nsReadableUtils.h"
michael@0 22 #include "nsNativeCharsetUtils.h"
michael@0 23 #include "nsError.h"
michael@0 24 #include "nsIUnicodeDecoder.h"
michael@0 25 #include "mozilla/dom/EncodingUtils.h"
michael@0 26
michael@0 27 using mozilla::dom::EncodingUtils;
michael@0 28
michael@0 29 // static functions declared below are moved from mailnews/mime/src/comi18n.cpp
michael@0 30
michael@0 31 static char *DecodeQ(const char *, uint32_t);
michael@0 32 static bool Is7bitNonAsciiString(const char *, uint32_t);
michael@0 33 static void CopyRawHeader(const char *, uint32_t, const char *, nsACString &);
michael@0 34 static nsresult DecodeRFC2047Str(const char *, const char *, bool, nsACString&);
michael@0 35 static nsresult internalDecodeParameter(const nsACString&, const char*,
michael@0 36 const char*, bool, bool, nsACString&);
michael@0 37
michael@0 38 // XXX The chance of UTF-7 being used in the message header is really
michael@0 39 // low, but in theory it's possible.
michael@0 40 #define IS_7BIT_NON_ASCII_CHARSET(cset) \
michael@0 41 (!nsCRT::strncasecmp((cset), "ISO-2022", 8) || \
michael@0 42 !nsCRT::strncasecmp((cset), "HZ-GB", 5) || \
michael@0 43 !nsCRT::strncasecmp((cset), "UTF-7", 5))
michael@0 44
michael@0 45 NS_IMPL_ISUPPORTS(nsMIMEHeaderParamImpl, nsIMIMEHeaderParam)
michael@0 46
michael@0 47 NS_IMETHODIMP
michael@0 48 nsMIMEHeaderParamImpl::GetParameter(const nsACString& aHeaderVal,
michael@0 49 const char *aParamName,
michael@0 50 const nsACString& aFallbackCharset,
michael@0 51 bool aTryLocaleCharset,
michael@0 52 char **aLang, nsAString& aResult)
michael@0 53 {
michael@0 54 return DoGetParameter(aHeaderVal, aParamName, MIME_FIELD_ENCODING,
michael@0 55 aFallbackCharset, aTryLocaleCharset, aLang, aResult);
michael@0 56 }
michael@0 57
michael@0 58 NS_IMETHODIMP
michael@0 59 nsMIMEHeaderParamImpl::GetParameterHTTP(const nsACString& aHeaderVal,
michael@0 60 const char *aParamName,
michael@0 61 const nsACString& aFallbackCharset,
michael@0 62 bool aTryLocaleCharset,
michael@0 63 char **aLang, nsAString& aResult)
michael@0 64 {
michael@0 65 return DoGetParameter(aHeaderVal, aParamName, HTTP_FIELD_ENCODING,
michael@0 66 aFallbackCharset, aTryLocaleCharset, aLang, aResult);
michael@0 67 }
michael@0 68
michael@0 69 // XXX : aTryLocaleCharset is not yet effective.
michael@0 70 nsresult
michael@0 71 nsMIMEHeaderParamImpl::DoGetParameter(const nsACString& aHeaderVal,
michael@0 72 const char *aParamName,
michael@0 73 ParamDecoding aDecoding,
michael@0 74 const nsACString& aFallbackCharset,
michael@0 75 bool aTryLocaleCharset,
michael@0 76 char **aLang, nsAString& aResult)
michael@0 77 {
michael@0 78 aResult.Truncate();
michael@0 79 nsresult rv;
michael@0 80
michael@0 81 // get parameter (decode RFC 2231/5987 when applicable, as specified by
michael@0 82 // aDecoding (5987 being a subset of 2231) and return charset.)
michael@0 83 nsXPIDLCString med;
michael@0 84 nsXPIDLCString charset;
michael@0 85 rv = DoParameterInternal(PromiseFlatCString(aHeaderVal).get(), aParamName,
michael@0 86 aDecoding, getter_Copies(charset), aLang,
michael@0 87 getter_Copies(med));
michael@0 88 if (NS_FAILED(rv))
michael@0 89 return rv;
michael@0 90
michael@0 91 // convert to UTF-8 after charset conversion and RFC 2047 decoding
michael@0 92 // if necessary.
michael@0 93
michael@0 94 nsAutoCString str1;
michael@0 95 rv = internalDecodeParameter(med, charset.get(), nullptr, false,
michael@0 96 // was aDecoding == MIME_FIELD_ENCODING
michael@0 97 // see bug 875615
michael@0 98 true,
michael@0 99 str1);
michael@0 100 NS_ENSURE_SUCCESS(rv, rv);
michael@0 101
michael@0 102 if (!aFallbackCharset.IsEmpty())
michael@0 103 {
michael@0 104 nsAutoCString charset;
michael@0 105 EncodingUtils::FindEncodingForLabel(aFallbackCharset, charset);
michael@0 106 nsAutoCString str2;
michael@0 107 nsCOMPtr<nsIUTF8ConverterService>
michael@0 108 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0 109 if (cvtUTF8 &&
michael@0 110 NS_SUCCEEDED(cvtUTF8->ConvertStringToUTF8(str1,
michael@0 111 PromiseFlatCString(aFallbackCharset).get(), false,
michael@0 112 !charset.EqualsLiteral("UTF-8"),
michael@0 113 1, str2))) {
michael@0 114 CopyUTF8toUTF16(str2, aResult);
michael@0 115 return NS_OK;
michael@0 116 }
michael@0 117 }
michael@0 118
michael@0 119 if (IsUTF8(str1)) {
michael@0 120 CopyUTF8toUTF16(str1, aResult);
michael@0 121 return NS_OK;
michael@0 122 }
michael@0 123
michael@0 124 if (aTryLocaleCharset && !NS_IsNativeUTF8())
michael@0 125 return NS_CopyNativeToUnicode(str1, aResult);
michael@0 126
michael@0 127 CopyASCIItoUTF16(str1, aResult);
michael@0 128 return NS_OK;
michael@0 129 }
michael@0 130
michael@0 131 // remove backslash-encoded sequences from quoted-strings
michael@0 132 // modifies string in place, potentially shortening it
michael@0 133 void RemoveQuotedStringEscapes(char *src)
michael@0 134 {
michael@0 135 char *dst = src;
michael@0 136
michael@0 137 for (char *c = src; *c; ++c)
michael@0 138 {
michael@0 139 if (c[0] == '\\' && c[1])
michael@0 140 {
michael@0 141 // skip backslash if not at end
michael@0 142 ++c;
michael@0 143 }
michael@0 144 *dst++ = *c;
michael@0 145 }
michael@0 146 *dst = 0;
michael@0 147 }
michael@0 148
michael@0 149 // true is character is a hex digit
michael@0 150 bool IsHexDigit(char aChar)
michael@0 151 {
michael@0 152 char c = aChar;
michael@0 153
michael@0 154 return (c >= 'a' && c <= 'f') ||
michael@0 155 (c >= 'A' && c <= 'F') ||
michael@0 156 (c >= '0' && c <= '9');
michael@0 157 }
michael@0 158
michael@0 159 // validate that a C String containing %-escapes is syntactically valid
michael@0 160 bool IsValidPercentEscaped(const char *aValue, int32_t len)
michael@0 161 {
michael@0 162 for (int32_t i = 0; i < len; i++) {
michael@0 163 if (aValue[i] == '%') {
michael@0 164 if (!IsHexDigit(aValue[i + 1]) || !IsHexDigit(aValue[i + 2])) {
michael@0 165 return false;
michael@0 166 }
michael@0 167 }
michael@0 168 }
michael@0 169 return true;
michael@0 170 }
michael@0 171
michael@0 172 // Support for continuations (RFC 2231, Section 3)
michael@0 173
michael@0 174 // only a sane number supported
michael@0 175 #define MAX_CONTINUATIONS 999
michael@0 176
michael@0 177 // part of a continuation
michael@0 178
michael@0 179 class Continuation {
michael@0 180 public:
michael@0 181 Continuation(const char *aValue, uint32_t aLength,
michael@0 182 bool aNeedsPercentDecoding, bool aWasQuotedString) {
michael@0 183 value = aValue;
michael@0 184 length = aLength;
michael@0 185 needsPercentDecoding = aNeedsPercentDecoding;
michael@0 186 wasQuotedString = aWasQuotedString;
michael@0 187 }
michael@0 188 Continuation() {
michael@0 189 // empty constructor needed for nsTArray
michael@0 190 value = 0L;
michael@0 191 length = 0;
michael@0 192 needsPercentDecoding = false;
michael@0 193 wasQuotedString = false;
michael@0 194 }
michael@0 195 ~Continuation() {}
michael@0 196
michael@0 197 const char *value;
michael@0 198 uint32_t length;
michael@0 199 bool needsPercentDecoding;
michael@0 200 bool wasQuotedString;
michael@0 201 };
michael@0 202
michael@0 203 // combine segments into a single string, returning the allocated string
michael@0 204 // (or nullptr) while emptying the list
michael@0 205 char *combineContinuations(nsTArray<Continuation>& aArray)
michael@0 206 {
michael@0 207 // Sanity check
michael@0 208 if (aArray.Length() == 0)
michael@0 209 return nullptr;
michael@0 210
michael@0 211 // Get an upper bound for the length
michael@0 212 uint32_t length = 0;
michael@0 213 for (uint32_t i = 0; i < aArray.Length(); i++) {
michael@0 214 length += aArray[i].length;
michael@0 215 }
michael@0 216
michael@0 217 // Allocate
michael@0 218 char *result = (char *) nsMemory::Alloc(length + 1);
michael@0 219
michael@0 220 // Concatenate
michael@0 221 if (result) {
michael@0 222 *result = '\0';
michael@0 223
michael@0 224 for (uint32_t i = 0; i < aArray.Length(); i++) {
michael@0 225 Continuation cont = aArray[i];
michael@0 226 if (! cont.value) break;
michael@0 227
michael@0 228 char *c = result + strlen(result);
michael@0 229 strncat(result, cont.value, cont.length);
michael@0 230 if (cont.needsPercentDecoding) {
michael@0 231 nsUnescape(c);
michael@0 232 }
michael@0 233 if (cont.wasQuotedString) {
michael@0 234 RemoveQuotedStringEscapes(c);
michael@0 235 }
michael@0 236 }
michael@0 237
michael@0 238 // return null if empty value
michael@0 239 if (*result == '\0') {
michael@0 240 nsMemory::Free(result);
michael@0 241 result = nullptr;
michael@0 242 }
michael@0 243 } else {
michael@0 244 // Handle OOM
michael@0 245 NS_WARNING("Out of memory\n");
michael@0 246 }
michael@0 247
michael@0 248 return result;
michael@0 249 }
michael@0 250
michael@0 251 // add a continuation, return false on error if segment already has been seen
michael@0 252 bool addContinuation(nsTArray<Continuation>& aArray, uint32_t aIndex,
michael@0 253 const char *aValue, uint32_t aLength,
michael@0 254 bool aNeedsPercentDecoding, bool aWasQuotedString)
michael@0 255 {
michael@0 256 if (aIndex < aArray.Length() && aArray[aIndex].value) {
michael@0 257 NS_WARNING("duplicate RC2231 continuation segment #\n");
michael@0 258 return false;
michael@0 259 }
michael@0 260
michael@0 261 if (aIndex > MAX_CONTINUATIONS) {
michael@0 262 NS_WARNING("RC2231 continuation segment # exceeds limit\n");
michael@0 263 return false;
michael@0 264 }
michael@0 265
michael@0 266 if (aNeedsPercentDecoding && aWasQuotedString) {
michael@0 267 NS_WARNING("RC2231 continuation segment can't use percent encoding and quoted string form at the same time\n");
michael@0 268 return false;
michael@0 269 }
michael@0 270
michael@0 271 Continuation cont(aValue, aLength, aNeedsPercentDecoding, aWasQuotedString);
michael@0 272
michael@0 273 if (aArray.Length() <= aIndex) {
michael@0 274 aArray.SetLength(aIndex + 1);
michael@0 275 }
michael@0 276 aArray[aIndex] = cont;
michael@0 277
michael@0 278 return true;
michael@0 279 }
michael@0 280
michael@0 281 // parse a segment number; return -1 on error
michael@0 282 int32_t parseSegmentNumber(const char *aValue, int32_t aLen)
michael@0 283 {
michael@0 284 if (aLen < 1) {
michael@0 285 NS_WARNING("segment number missing\n");
michael@0 286 return -1;
michael@0 287 }
michael@0 288
michael@0 289 if (aLen > 1 && aValue[0] == '0') {
michael@0 290 NS_WARNING("leading '0' not allowed in segment number\n");
michael@0 291 return -1;
michael@0 292 }
michael@0 293
michael@0 294 int32_t segmentNumber = 0;
michael@0 295
michael@0 296 for (int32_t i = 0; i < aLen; i++) {
michael@0 297 if (! (aValue[i] >= '0' && aValue[i] <= '9')) {
michael@0 298 NS_WARNING("invalid characters in segment number\n");
michael@0 299 return -1;
michael@0 300 }
michael@0 301
michael@0 302 segmentNumber *= 10;
michael@0 303 segmentNumber += aValue[i] - '0';
michael@0 304 if (segmentNumber > MAX_CONTINUATIONS) {
michael@0 305 NS_WARNING("Segment number exceeds sane size\n");
michael@0 306 return -1;
michael@0 307 }
michael@0 308 }
michael@0 309
michael@0 310 return segmentNumber;
michael@0 311 }
michael@0 312
michael@0 313 // validate a given octet sequence for compliance with the specified
michael@0 314 // encoding
michael@0 315 bool IsValidOctetSequenceForCharset(nsACString& aCharset, const char *aOctets)
michael@0 316 {
michael@0 317 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService
michael@0 318 (NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0 319 if (!cvtUTF8) {
michael@0 320 NS_WARNING("Can't get UTF8ConverterService\n");
michael@0 321 return false;
michael@0 322 }
michael@0 323
michael@0 324 nsAutoCString tmpRaw;
michael@0 325 tmpRaw.Assign(aOctets);
michael@0 326 nsAutoCString tmpDecoded;
michael@0 327
michael@0 328 nsresult rv = cvtUTF8->ConvertStringToUTF8(tmpRaw,
michael@0 329 PromiseFlatCString(aCharset).get(),
michael@0 330 false, false, 1, tmpDecoded);
michael@0 331
michael@0 332 if (rv != NS_OK) {
michael@0 333 // we can't decode; charset may be unsupported, or the octet sequence
michael@0 334 // is broken (illegal or incomplete octet sequence contained)
michael@0 335 NS_WARNING("RFC2231/5987 parameter value does not decode according to specified charset\n");
michael@0 336 return false;
michael@0 337 }
michael@0 338
michael@0 339 return true;
michael@0 340 }
michael@0 341
michael@0 342 // moved almost verbatim from mimehdrs.cpp
michael@0 343 // char *
michael@0 344 // MimeHeaders_get_parameter (const char *header_value, const char *parm_name,
michael@0 345 // char **charset, char **language)
michael@0 346 //
michael@0 347 // The format of these header lines is
michael@0 348 // <token> [ ';' <token> '=' <token-or-quoted-string> ]*
michael@0 349 NS_IMETHODIMP
michael@0 350 nsMIMEHeaderParamImpl::GetParameterInternal(const char *aHeaderValue,
michael@0 351 const char *aParamName,
michael@0 352 char **aCharset,
michael@0 353 char **aLang,
michael@0 354 char **aResult)
michael@0 355 {
michael@0 356 return DoParameterInternal(aHeaderValue, aParamName, MIME_FIELD_ENCODING,
michael@0 357 aCharset, aLang, aResult);
michael@0 358 }
michael@0 359
michael@0 360
michael@0 361 nsresult
michael@0 362 nsMIMEHeaderParamImpl::DoParameterInternal(const char *aHeaderValue,
michael@0 363 const char *aParamName,
michael@0 364 ParamDecoding aDecoding,
michael@0 365 char **aCharset,
michael@0 366 char **aLang,
michael@0 367 char **aResult)
michael@0 368 {
michael@0 369
michael@0 370 if (!aHeaderValue || !*aHeaderValue || !aResult)
michael@0 371 return NS_ERROR_INVALID_ARG;
michael@0 372
michael@0 373 *aResult = nullptr;
michael@0 374
michael@0 375 if (aCharset) *aCharset = nullptr;
michael@0 376 if (aLang) *aLang = nullptr;
michael@0 377
michael@0 378 nsAutoCString charset;
michael@0 379
michael@0 380 // change to (aDecoding != HTTP_FIELD_ENCODING) when we want to disable
michael@0 381 // them for HTTP header fields later on, see bug 776324
michael@0 382 bool acceptContinuations = true;
michael@0 383
michael@0 384 const char *str = aHeaderValue;
michael@0 385
michael@0 386 // skip leading white space.
michael@0 387 for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
michael@0 388 ;
michael@0 389 const char *start = str;
michael@0 390
michael@0 391 // aParamName is empty. return the first (possibly) _unnamed_ 'parameter'
michael@0 392 // For instance, return 'inline' in the following case:
michael@0 393 // Content-Disposition: inline; filename=.....
michael@0 394 if (!aParamName || !*aParamName)
michael@0 395 {
michael@0 396 for (; *str && *str != ';' && !nsCRT::IsAsciiSpace(*str); ++str)
michael@0 397 ;
michael@0 398 if (str == start)
michael@0 399 return NS_ERROR_FIRST_HEADER_FIELD_COMPONENT_EMPTY;
michael@0 400
michael@0 401 *aResult = (char *) nsMemory::Clone(start, (str - start) + 1);
michael@0 402 NS_ENSURE_TRUE(*aResult, NS_ERROR_OUT_OF_MEMORY);
michael@0 403 (*aResult)[str - start] = '\0'; // null-terminate
michael@0 404 return NS_OK;
michael@0 405 }
michael@0 406
michael@0 407 /* Skip forward to first ';' */
michael@0 408 for (; *str && *str != ';' && *str != ','; ++str)
michael@0 409 ;
michael@0 410 if (*str)
michael@0 411 str++;
michael@0 412 /* Skip over following whitespace */
michael@0 413 for (; *str && nsCRT::IsAsciiSpace(*str); ++str)
michael@0 414 ;
michael@0 415
michael@0 416 // Some broken http servers just specify parameters
michael@0 417 // like 'filename' without specifying disposition
michael@0 418 // method. Rewind to the first non-white-space
michael@0 419 // character.
michael@0 420
michael@0 421 if (!*str)
michael@0 422 str = start;
michael@0 423
michael@0 424 // RFC2231 - The legitimate parm format can be:
michael@0 425 // A. title=ThisIsTitle
michael@0 426 // B. title*=us-ascii'en-us'This%20is%20wierd.
michael@0 427 // C. title*0*=us-ascii'en'This%20is%20wierd.%20We
michael@0 428 // title*1*=have%20to%20support%20this.
michael@0 429 // title*2="Else..."
michael@0 430 // D. title*0="Hey, what you think you are doing?"
michael@0 431 // title*1="There is no charset and lang info."
michael@0 432 // RFC5987: only A and B
michael@0 433
michael@0 434 // collect results for the different algorithms (plain filename,
michael@0 435 // RFC5987/2231-encoded filename, + continuations) separately and decide
michael@0 436 // which to use at the end
michael@0 437 char *caseAResult = nullptr;
michael@0 438 char *caseBResult = nullptr;
michael@0 439 char *caseCDResult = nullptr;
michael@0 440
michael@0 441 // collect continuation segments
michael@0 442 nsTArray<Continuation> segments;
michael@0 443
michael@0 444
michael@0 445 // our copies of the charset parameter, kept separately as they might
michael@0 446 // differ for the two formats
michael@0 447 nsDependentCSubstring charsetB, charsetCD;
michael@0 448
michael@0 449 nsDependentCSubstring lang;
michael@0 450
michael@0 451 int32_t paramLen = strlen(aParamName);
michael@0 452
michael@0 453 while (*str) {
michael@0 454 // find name/value
michael@0 455
michael@0 456 const char *nameStart = str;
michael@0 457 const char *nameEnd = nullptr;
michael@0 458 const char *valueStart = str;
michael@0 459 const char *valueEnd = nullptr;
michael@0 460 bool isQuotedString = false;
michael@0 461
michael@0 462 NS_ASSERTION(!nsCRT::IsAsciiSpace(*str), "should be after whitespace.");
michael@0 463
michael@0 464 // Skip forward to the end of this token.
michael@0 465 for (; *str && !nsCRT::IsAsciiSpace(*str) && *str != '=' && *str != ';'; str++)
michael@0 466 ;
michael@0 467 nameEnd = str;
michael@0 468
michael@0 469 int32_t nameLen = nameEnd - nameStart;
michael@0 470
michael@0 471 // Skip over whitespace, '=', and whitespace
michael@0 472 while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0 473 if (!*str) {
michael@0 474 break;
michael@0 475 }
michael@0 476 if (*str++ != '=') {
michael@0 477 // don't accept parameters without "="
michael@0 478 goto increment_str;
michael@0 479 }
michael@0 480 while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0 481
michael@0 482 if (*str != '"') {
michael@0 483 // The value is a token, not a quoted string.
michael@0 484 valueStart = str;
michael@0 485 for (valueEnd = str;
michael@0 486 *valueEnd && !nsCRT::IsAsciiSpace (*valueEnd) && *valueEnd != ';';
michael@0 487 valueEnd++)
michael@0 488 ;
michael@0 489 str = valueEnd;
michael@0 490 } else {
michael@0 491 isQuotedString = true;
michael@0 492
michael@0 493 ++str;
michael@0 494 valueStart = str;
michael@0 495 for (valueEnd = str; *valueEnd; ++valueEnd) {
michael@0 496 if (*valueEnd == '\\' && *(valueEnd + 1))
michael@0 497 ++valueEnd;
michael@0 498 else if (*valueEnd == '"')
michael@0 499 break;
michael@0 500 }
michael@0 501 str = valueEnd;
michael@0 502 // *valueEnd != null means that *valueEnd is quote character.
michael@0 503 if (*valueEnd)
michael@0 504 str++;
michael@0 505 }
michael@0 506
michael@0 507 // See if this is the simplest case (case A above),
michael@0 508 // a 'single' line value with no charset and lang.
michael@0 509 // If so, copy it and return.
michael@0 510 if (nameLen == paramLen &&
michael@0 511 !nsCRT::strncasecmp(nameStart, aParamName, paramLen)) {
michael@0 512
michael@0 513 if (caseAResult) {
michael@0 514 // we already have one caseA result, ignore subsequent ones
michael@0 515 goto increment_str;
michael@0 516 }
michael@0 517
michael@0 518 // if the parameter spans across multiple lines we have to strip out the
michael@0 519 // line continuation -- jht 4/29/98
michael@0 520 nsAutoCString tempStr(valueStart, valueEnd - valueStart);
michael@0 521 tempStr.StripChars("\r\n");
michael@0 522 char *res = ToNewCString(tempStr);
michael@0 523 NS_ENSURE_TRUE(res, NS_ERROR_OUT_OF_MEMORY);
michael@0 524
michael@0 525 if (isQuotedString)
michael@0 526 RemoveQuotedStringEscapes(res);
michael@0 527
michael@0 528 caseAResult = res;
michael@0 529 // keep going, we may find a RFC 2231/5987 encoded alternative
michael@0 530 }
michael@0 531 // case B, C, and D
michael@0 532 else if (nameLen > paramLen &&
michael@0 533 !nsCRT::strncasecmp(nameStart, aParamName, paramLen) &&
michael@0 534 *(nameStart + paramLen) == '*') {
michael@0 535
michael@0 536 // 1st char past '*'
michael@0 537 const char *cp = nameStart + paramLen + 1;
michael@0 538
michael@0 539 // if param name ends in "*" we need do to RFC5987 "ext-value" decoding
michael@0 540 bool needExtDecoding = *(nameEnd - 1) == '*';
michael@0 541
michael@0 542 bool caseB = nameLen == paramLen + 1;
michael@0 543 bool caseCStart = (*cp == '0') && needExtDecoding;
michael@0 544
michael@0 545 // parse the segment number
michael@0 546 int32_t segmentNumber = -1;
michael@0 547 if (!caseB) {
michael@0 548 int32_t segLen = (nameEnd - cp) - (needExtDecoding ? 1 : 0);
michael@0 549 segmentNumber = parseSegmentNumber(cp, segLen);
michael@0 550
michael@0 551 if (segmentNumber == -1) {
michael@0 552 acceptContinuations = false;
michael@0 553 goto increment_str;
michael@0 554 }
michael@0 555 }
michael@0 556
michael@0 557 // CaseB and start of CaseC: requires charset and optional language
michael@0 558 // in quotes (quotes required even if lang is blank)
michael@0 559 if (caseB || (caseCStart && acceptContinuations)) {
michael@0 560 // look for single quotation mark(')
michael@0 561 const char *sQuote1 = PL_strchr(valueStart, 0x27);
michael@0 562 const char *sQuote2 = sQuote1 ? PL_strchr(sQuote1 + 1, 0x27) : nullptr;
michael@0 563
michael@0 564 // Two single quotation marks must be present even in
michael@0 565 // absence of charset and lang.
michael@0 566 if (!sQuote1 || !sQuote2) {
michael@0 567 NS_WARNING("Mandatory two single quotes are missing in header parameter\n");
michael@0 568 }
michael@0 569
michael@0 570 const char *charsetStart = nullptr;
michael@0 571 int32_t charsetLength = 0;
michael@0 572 const char *langStart = nullptr;
michael@0 573 int32_t langLength = 0;
michael@0 574 const char *rawValStart = nullptr;
michael@0 575 int32_t rawValLength = 0;
michael@0 576
michael@0 577 if (sQuote2 && sQuote1) {
michael@0 578 // both delimiters present: charSet'lang'rawVal
michael@0 579 rawValStart = sQuote2 + 1;
michael@0 580 rawValLength = valueEnd - rawValStart;
michael@0 581
michael@0 582 langStart = sQuote1 + 1;
michael@0 583 langLength = sQuote2 - langStart;
michael@0 584
michael@0 585 charsetStart = valueStart;
michael@0 586 charsetLength = sQuote1 - charsetStart;
michael@0 587 }
michael@0 588 else if (sQuote1) {
michael@0 589 // one delimiter; assume charset'rawVal
michael@0 590 rawValStart = sQuote1 + 1;
michael@0 591 rawValLength = valueEnd - rawValStart;
michael@0 592
michael@0 593 charsetStart = valueStart;
michael@0 594 charsetLength = sQuote1 - valueStart;
michael@0 595 }
michael@0 596 else {
michael@0 597 // no delimiter: just rawVal
michael@0 598 rawValStart = valueStart;
michael@0 599 rawValLength = valueEnd - valueStart;
michael@0 600 }
michael@0 601
michael@0 602 if (langLength != 0) {
michael@0 603 lang.Assign(langStart, langLength);
michael@0 604 }
michael@0 605
michael@0 606 // keep the charset for later
michael@0 607 if (caseB) {
michael@0 608 charsetB.Assign(charsetStart, charsetLength);
michael@0 609 } else {
michael@0 610 // if caseCorD
michael@0 611 charsetCD.Assign(charsetStart, charsetLength);
michael@0 612 }
michael@0 613
michael@0 614 // non-empty value part
michael@0 615 if (rawValLength > 0) {
michael@0 616 if (!caseBResult && caseB) {
michael@0 617 if (!IsValidPercentEscaped(rawValStart, rawValLength)) {
michael@0 618 goto increment_str;
michael@0 619 }
michael@0 620
michael@0 621 // allocate buffer for the raw value
michael@0 622 char *tmpResult = (char *) nsMemory::Clone(rawValStart, rawValLength + 1);
michael@0 623 if (!tmpResult) {
michael@0 624 goto increment_str;
michael@0 625 }
michael@0 626 *(tmpResult + rawValLength) = 0;
michael@0 627
michael@0 628 nsUnescape(tmpResult);
michael@0 629 caseBResult = tmpResult;
michael@0 630 } else {
michael@0 631 // caseC
michael@0 632 bool added = addContinuation(segments, 0, rawValStart,
michael@0 633 rawValLength, needExtDecoding,
michael@0 634 isQuotedString);
michael@0 635
michael@0 636 if (!added) {
michael@0 637 // continuation not added, stop processing them
michael@0 638 acceptContinuations = false;
michael@0 639 }
michael@0 640 }
michael@0 641 }
michael@0 642 } // end of if-block : title*0*= or title*=
michael@0 643 // caseD: a line of multiline param with no need for unescaping : title*[0-9]=
michael@0 644 // or 2nd or later lines of a caseC param : title*[1-9]*=
michael@0 645 else if (acceptContinuations && segmentNumber != -1) {
michael@0 646 uint32_t valueLength = valueEnd - valueStart;
michael@0 647
michael@0 648 bool added = addContinuation(segments, segmentNumber, valueStart,
michael@0 649 valueLength, needExtDecoding,
michael@0 650 isQuotedString);
michael@0 651
michael@0 652 if (!added) {
michael@0 653 // continuation not added, stop processing them
michael@0 654 acceptContinuations = false;
michael@0 655 }
michael@0 656 } // end of if-block : title*[0-9]= or title*[1-9]*=
michael@0 657 }
michael@0 658
michael@0 659 // str now points after the end of the value.
michael@0 660 // skip over whitespace, ';', whitespace.
michael@0 661 increment_str:
michael@0 662 while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0 663 if (*str == ';') {
michael@0 664 ++str;
michael@0 665 } else {
michael@0 666 // stop processing the header field; either we are done or the
michael@0 667 // separator was missing
michael@0 668 break;
michael@0 669 }
michael@0 670 while (nsCRT::IsAsciiSpace(*str)) ++str;
michael@0 671 }
michael@0 672
michael@0 673 caseCDResult = combineContinuations(segments);
michael@0 674
michael@0 675 if (caseBResult && !charsetB.IsEmpty()) {
michael@0 676 // check that the 2231/5987 result decodes properly given the
michael@0 677 // specified character set
michael@0 678 if (!IsValidOctetSequenceForCharset(charsetB, caseBResult))
michael@0 679 caseBResult = nullptr;
michael@0 680 }
michael@0 681
michael@0 682 if (caseCDResult && !charsetCD.IsEmpty()) {
michael@0 683 // check that the 2231/5987 result decodes properly given the
michael@0 684 // specified character set
michael@0 685 if (!IsValidOctetSequenceForCharset(charsetCD, caseCDResult))
michael@0 686 caseCDResult = nullptr;
michael@0 687 }
michael@0 688
michael@0 689 if (caseBResult) {
michael@0 690 // prefer simple 5987 format over 2231 with continuations
michael@0 691 *aResult = caseBResult;
michael@0 692 caseBResult = nullptr;
michael@0 693 charset.Assign(charsetB);
michael@0 694 }
michael@0 695 else if (caseCDResult) {
michael@0 696 // prefer 2231/5987 with or without continuations over plain format
michael@0 697 *aResult = caseCDResult;
michael@0 698 caseCDResult = nullptr;
michael@0 699 charset.Assign(charsetCD);
michael@0 700 }
michael@0 701 else if (caseAResult) {
michael@0 702 *aResult = caseAResult;
michael@0 703 caseAResult = nullptr;
michael@0 704 }
michael@0 705
michael@0 706 // free unused stuff
michael@0 707 nsMemory::Free(caseAResult);
michael@0 708 nsMemory::Free(caseBResult);
michael@0 709 nsMemory::Free(caseCDResult);
michael@0 710
michael@0 711 // if we have a result
michael@0 712 if (*aResult) {
michael@0 713 // then return charset and lang as well
michael@0 714 if (aLang && !lang.IsEmpty()) {
michael@0 715 uint32_t len = lang.Length();
michael@0 716 *aLang = (char *) nsMemory::Clone(lang.BeginReading(), len + 1);
michael@0 717 if (*aLang) {
michael@0 718 *(*aLang + len) = 0;
michael@0 719 }
michael@0 720 }
michael@0 721 if (aCharset && !charset.IsEmpty()) {
michael@0 722 uint32_t len = charset.Length();
michael@0 723 *aCharset = (char *) nsMemory::Clone(charset.BeginReading(), len + 1);
michael@0 724 if (*aCharset) {
michael@0 725 *(*aCharset + len) = 0;
michael@0 726 }
michael@0 727 }
michael@0 728 }
michael@0 729
michael@0 730 return *aResult ? NS_OK : NS_ERROR_INVALID_ARG;
michael@0 731 }
michael@0 732
michael@0 733 nsresult
michael@0 734 internalDecodeRFC2047Header(const char* aHeaderVal, const char* aDefaultCharset,
michael@0 735 bool aOverrideCharset, bool aEatContinuations,
michael@0 736 nsACString& aResult)
michael@0 737 {
michael@0 738 aResult.Truncate();
michael@0 739 if (!aHeaderVal)
michael@0 740 return NS_ERROR_INVALID_ARG;
michael@0 741 if (!*aHeaderVal)
michael@0 742 return NS_OK;
michael@0 743
michael@0 744
michael@0 745 // If aHeaderVal is RFC 2047 encoded or is not a UTF-8 string but
michael@0 746 // aDefaultCharset is specified, decodes RFC 2047 encoding and converts
michael@0 747 // to UTF-8. Otherwise, just strips away CRLF.
michael@0 748 if (PL_strstr(aHeaderVal, "=?") ||
michael@0 749 (aDefaultCharset && (!IsUTF8(nsDependentCString(aHeaderVal)) ||
michael@0 750 Is7bitNonAsciiString(aHeaderVal, strlen(aHeaderVal))))) {
michael@0 751 DecodeRFC2047Str(aHeaderVal, aDefaultCharset, aOverrideCharset, aResult);
michael@0 752 } else if (aEatContinuations &&
michael@0 753 (PL_strchr(aHeaderVal, '\n') || PL_strchr(aHeaderVal, '\r'))) {
michael@0 754 aResult = aHeaderVal;
michael@0 755 } else {
michael@0 756 aEatContinuations = false;
michael@0 757 aResult = aHeaderVal;
michael@0 758 }
michael@0 759
michael@0 760 if (aEatContinuations) {
michael@0 761 nsAutoCString temp(aResult);
michael@0 762 temp.ReplaceSubstring("\n\t", " ");
michael@0 763 temp.ReplaceSubstring("\r\t", " ");
michael@0 764 temp.StripChars("\r\n");
michael@0 765 aResult = temp;
michael@0 766 }
michael@0 767
michael@0 768 return NS_OK;
michael@0 769 }
michael@0 770
michael@0 771 NS_IMETHODIMP
michael@0 772 nsMIMEHeaderParamImpl::DecodeRFC2047Header(const char* aHeaderVal,
michael@0 773 const char* aDefaultCharset,
michael@0 774 bool aOverrideCharset,
michael@0 775 bool aEatContinuations,
michael@0 776 nsACString& aResult)
michael@0 777 {
michael@0 778 return internalDecodeRFC2047Header(aHeaderVal, aDefaultCharset,
michael@0 779 aOverrideCharset, aEatContinuations,
michael@0 780 aResult);
michael@0 781 }
michael@0 782
michael@0 783 // true if the character is allowed in a RFC 5987 value
michael@0 784 // see RFC 5987, Section 3.2.1, "attr-char"
michael@0 785 bool IsRFC5987AttrChar(char aChar)
michael@0 786 {
michael@0 787 char c = aChar;
michael@0 788
michael@0 789 return (c >= 'a' && c <= 'z') ||
michael@0 790 (c >= 'A' && c <= 'Z') ||
michael@0 791 (c >= '0' && c <= '9') ||
michael@0 792 (c == '!' || c == '#' || c == '$' || c == '&' ||
michael@0 793 c == '+' || c == '-' || c == '.' || c == '^' ||
michael@0 794 c == '_' || c == '`' || c == '|' || c == '~');
michael@0 795 }
michael@0 796
michael@0 797 // percent-decode a value
michael@0 798 // returns false on failure
michael@0 799 bool PercentDecode(nsACString& aValue)
michael@0 800 {
michael@0 801 char *c = (char *) nsMemory::Alloc(aValue.Length() + 1);
michael@0 802 if (!c) {
michael@0 803 return false;
michael@0 804 }
michael@0 805
michael@0 806 strcpy(c, PromiseFlatCString(aValue).get());
michael@0 807 nsUnescape(c);
michael@0 808 aValue.Assign(c);
michael@0 809 nsMemory::Free(c);
michael@0 810
michael@0 811 return true;
michael@0 812 }
michael@0 813
michael@0 814 // Decode a parameter value using the encoding defined in RFC 5987
michael@0 815 //
michael@0 816 // charset "'" [ language ] "'" value-chars
michael@0 817 NS_IMETHODIMP
michael@0 818 nsMIMEHeaderParamImpl::DecodeRFC5987Param(const nsACString& aParamVal,
michael@0 819 nsACString& aLang,
michael@0 820 nsAString& aResult)
michael@0 821 {
michael@0 822 nsAutoCString charset;
michael@0 823 nsAutoCString language;
michael@0 824 nsAutoCString value;
michael@0 825
michael@0 826 uint32_t delimiters = 0;
michael@0 827 const char *encoded = PromiseFlatCString(aParamVal).get();
michael@0 828 const char *c = encoded;
michael@0 829
michael@0 830 while (*c) {
michael@0 831 char tc = *c++;
michael@0 832
michael@0 833 if (tc == '\'') {
michael@0 834 // single quote
michael@0 835 delimiters++;
michael@0 836 } else if (((unsigned char)tc) >= 128) {
michael@0 837 // fail early, not ASCII
michael@0 838 NS_WARNING("non-US-ASCII character in RFC5987-encoded param");
michael@0 839 return NS_ERROR_INVALID_ARG;
michael@0 840 } else {
michael@0 841 if (delimiters == 0) {
michael@0 842 // valid characters are checked later implicitly
michael@0 843 charset.Append(tc);
michael@0 844 } else if (delimiters == 1) {
michael@0 845 // no value checking for now
michael@0 846 language.Append(tc);
michael@0 847 } else if (delimiters == 2) {
michael@0 848 if (IsRFC5987AttrChar(tc)) {
michael@0 849 value.Append(tc);
michael@0 850 } else if (tc == '%') {
michael@0 851 if (!IsHexDigit(c[0]) || !IsHexDigit(c[1])) {
michael@0 852 // we expect two more characters
michael@0 853 NS_WARNING("broken %-escape in RFC5987-encoded param");
michael@0 854 return NS_ERROR_INVALID_ARG;
michael@0 855 }
michael@0 856 value.Append(tc);
michael@0 857 // we consume two more
michael@0 858 value.Append(*c++);
michael@0 859 value.Append(*c++);
michael@0 860 } else {
michael@0 861 // character not allowed here
michael@0 862 NS_WARNING("invalid character in RFC5987-encoded param");
michael@0 863 return NS_ERROR_INVALID_ARG;
michael@0 864 }
michael@0 865 }
michael@0 866 }
michael@0 867 }
michael@0 868
michael@0 869 if (delimiters != 2) {
michael@0 870 NS_WARNING("missing delimiters in RFC5987-encoded param");
michael@0 871 return NS_ERROR_INVALID_ARG;
michael@0 872 }
michael@0 873
michael@0 874 // abort early for unsupported encodings
michael@0 875 if (!charset.LowerCaseEqualsLiteral("utf-8")) {
michael@0 876 NS_WARNING("unsupported charset in RFC5987-encoded param");
michael@0 877 return NS_ERROR_INVALID_ARG;
michael@0 878 }
michael@0 879
michael@0 880 // percent-decode
michael@0 881 if (!PercentDecode(value)) {
michael@0 882 return NS_ERROR_OUT_OF_MEMORY;
michael@0 883 }
michael@0 884
michael@0 885 // return the encoding
michael@0 886 aLang.Assign(language);
michael@0 887
michael@0 888 // finally convert octet sequence to UTF-8 and be done
michael@0 889 nsresult rv = NS_OK;
michael@0 890 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8 =
michael@0 891 do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv);
michael@0 892 NS_ENSURE_SUCCESS(rv, rv);
michael@0 893
michael@0 894 nsAutoCString utf8;
michael@0 895 rv = cvtUTF8->ConvertStringToUTF8(value, charset.get(), true, false, 1, utf8);
michael@0 896 NS_ENSURE_SUCCESS(rv, rv);
michael@0 897
michael@0 898 CopyUTF8toUTF16(utf8, aResult);
michael@0 899 return NS_OK;
michael@0 900 }
michael@0 901
michael@0 902 nsresult
michael@0 903 internalDecodeParameter(const nsACString& aParamValue, const char* aCharset,
michael@0 904 const char* aDefaultCharset, bool aOverrideCharset,
michael@0 905 bool aDecode2047, nsACString& aResult)
michael@0 906 {
michael@0 907 aResult.Truncate();
michael@0 908 // If aCharset is given, aParamValue was obtained from RFC2231/5987
michael@0 909 // encoding and we're pretty sure that it's in aCharset.
michael@0 910 if (aCharset && *aCharset)
michael@0 911 {
michael@0 912 nsCOMPtr<nsIUTF8ConverterService> cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0 913 if (cvtUTF8)
michael@0 914 return cvtUTF8->ConvertStringToUTF8(aParamValue, aCharset,
michael@0 915 true, true, 1, aResult);
michael@0 916 }
michael@0 917
michael@0 918 const nsAFlatCString& param = PromiseFlatCString(aParamValue);
michael@0 919 nsAutoCString unQuoted;
michael@0 920 nsACString::const_iterator s, e;
michael@0 921 param.BeginReading(s);
michael@0 922 param.EndReading(e);
michael@0 923
michael@0 924 // strip '\' when used to quote CR, LF, '"' and '\'
michael@0 925 for ( ; s != e; ++s) {
michael@0 926 if ((*s == '\\')) {
michael@0 927 if (++s == e) {
michael@0 928 --s; // '\' is at the end. move back and append '\'.
michael@0 929 }
michael@0 930 else if (*s != nsCRT::CR && *s != nsCRT::LF && *s != '"' && *s != '\\') {
michael@0 931 --s; // '\' is not foll. by CR,LF,'"','\'. move back and append '\'
michael@0 932 }
michael@0 933 // else : skip '\' and append the quoted character.
michael@0 934 }
michael@0 935 unQuoted.Append(*s);
michael@0 936 }
michael@0 937
michael@0 938 aResult = unQuoted;
michael@0 939 nsresult rv = NS_OK;
michael@0 940
michael@0 941 if (aDecode2047) {
michael@0 942 nsAutoCString decoded;
michael@0 943
michael@0 944 // Try RFC 2047 encoding, instead.
michael@0 945 rv = internalDecodeRFC2047Header(unQuoted.get(), aDefaultCharset,
michael@0 946 aOverrideCharset, true, decoded);
michael@0 947
michael@0 948 if (NS_SUCCEEDED(rv) && !decoded.IsEmpty())
michael@0 949 aResult = decoded;
michael@0 950 }
michael@0 951
michael@0 952 return rv;
michael@0 953 }
michael@0 954
michael@0 955 NS_IMETHODIMP
michael@0 956 nsMIMEHeaderParamImpl::DecodeParameter(const nsACString& aParamValue,
michael@0 957 const char* aCharset,
michael@0 958 const char* aDefaultCharset,
michael@0 959 bool aOverrideCharset,
michael@0 960 nsACString& aResult)
michael@0 961 {
michael@0 962 return internalDecodeParameter(aParamValue, aCharset, aDefaultCharset,
michael@0 963 aOverrideCharset, true, aResult);
michael@0 964 }
michael@0 965
michael@0 966 #define ISHEXCHAR(c) \
michael@0 967 ((0x30 <= uint8_t(c) && uint8_t(c) <= 0x39) || \
michael@0 968 (0x41 <= uint8_t(c) && uint8_t(c) <= 0x46) || \
michael@0 969 (0x61 <= uint8_t(c) && uint8_t(c) <= 0x66))
michael@0 970
michael@0 971 // Decode Q encoding (RFC 2047).
michael@0 972 // static
michael@0 973 char *DecodeQ(const char *in, uint32_t length)
michael@0 974 {
michael@0 975 char *out, *dest = 0;
michael@0 976
michael@0 977 out = dest = (char *)PR_Calloc(length + 1, sizeof(char));
michael@0 978 if (dest == nullptr)
michael@0 979 return nullptr;
michael@0 980 while (length > 0) {
michael@0 981 unsigned c = 0;
michael@0 982 switch (*in) {
michael@0 983 case '=':
michael@0 984 // check if |in| in the form of '=hh' where h is [0-9a-fA-F].
michael@0 985 if (length < 3 || !ISHEXCHAR(in[1]) || !ISHEXCHAR(in[2]))
michael@0 986 goto badsyntax;
michael@0 987 PR_sscanf(in + 1, "%2X", &c);
michael@0 988 *out++ = (char) c;
michael@0 989 in += 3;
michael@0 990 length -= 3;
michael@0 991 break;
michael@0 992
michael@0 993 case '_':
michael@0 994 *out++ = ' ';
michael@0 995 in++;
michael@0 996 length--;
michael@0 997 break;
michael@0 998
michael@0 999 default:
michael@0 1000 if (*in & 0x80) goto badsyntax;
michael@0 1001 *out++ = *in++;
michael@0 1002 length--;
michael@0 1003 }
michael@0 1004 }
michael@0 1005 *out++ = '\0';
michael@0 1006
michael@0 1007 for (out = dest; *out ; ++out) {
michael@0 1008 if (*out == '\t')
michael@0 1009 *out = ' ';
michael@0 1010 }
michael@0 1011
michael@0 1012 return dest;
michael@0 1013
michael@0 1014 badsyntax:
michael@0 1015 PR_Free(dest);
michael@0 1016 return nullptr;
michael@0 1017 }
michael@0 1018
michael@0 1019 // check if input is HZ (a 7bit encoding for simplified Chinese : RFC 1842))
michael@0 1020 // or has ESC which may be an indication that it's in one of many ISO
michael@0 1021 // 2022 7bit encodings (e.g. ISO-2022-JP(-2)/CN : see RFC 1468, 1922, 1554).
michael@0 1022 // static
michael@0 1023 bool Is7bitNonAsciiString(const char *input, uint32_t len)
michael@0 1024 {
michael@0 1025 int32_t c;
michael@0 1026
michael@0 1027 enum { hz_initial, // No HZ seen yet
michael@0 1028 hz_escaped, // Inside an HZ ~{ escape sequence
michael@0 1029 hz_seen, // Have seen at least one complete HZ sequence
michael@0 1030 hz_notpresent // Have seen something that is not legal HZ
michael@0 1031 } hz_state;
michael@0 1032
michael@0 1033 hz_state = hz_initial;
michael@0 1034 while (len) {
michael@0 1035 c = uint8_t(*input++);
michael@0 1036 len--;
michael@0 1037 if (c & 0x80) return false;
michael@0 1038 if (c == 0x1B) return true;
michael@0 1039 if (c == '~') {
michael@0 1040 switch (hz_state) {
michael@0 1041 case hz_initial:
michael@0 1042 case hz_seen:
michael@0 1043 if (*input == '{') {
michael@0 1044 hz_state = hz_escaped;
michael@0 1045 } else if (*input == '~') {
michael@0 1046 // ~~ is the HZ encoding of ~. Skip over second ~ as well
michael@0 1047 hz_state = hz_seen;
michael@0 1048 input++;
michael@0 1049 len--;
michael@0 1050 } else {
michael@0 1051 hz_state = hz_notpresent;
michael@0 1052 }
michael@0 1053 break;
michael@0 1054
michael@0 1055 case hz_escaped:
michael@0 1056 if (*input == '}') hz_state = hz_seen;
michael@0 1057 break;
michael@0 1058 default:
michael@0 1059 break;
michael@0 1060 }
michael@0 1061 }
michael@0 1062 }
michael@0 1063 return hz_state == hz_seen;
michael@0 1064 }
michael@0 1065
michael@0 1066 #define REPLACEMENT_CHAR "\357\277\275" // EF BF BD (UTF-8 encoding of U+FFFD)
michael@0 1067
michael@0 1068 // copy 'raw' sequences of octets in aInput to aOutput.
michael@0 1069 // If aDefaultCharset is specified, the input is assumed to be in the
michael@0 1070 // charset and converted to UTF-8. Otherwise, a blind copy is made.
michael@0 1071 // If aDefaultCharset is specified, but the conversion to UTF-8
michael@0 1072 // is not successful, each octet is replaced by Unicode replacement
michael@0 1073 // chars. *aOutput is advanced by the number of output octets.
michael@0 1074 // static
michael@0 1075 void CopyRawHeader(const char *aInput, uint32_t aLen,
michael@0 1076 const char *aDefaultCharset, nsACString &aOutput)
michael@0 1077 {
michael@0 1078 int32_t c;
michael@0 1079
michael@0 1080 // If aDefaultCharset is not specified, make a blind copy.
michael@0 1081 if (!aDefaultCharset || !*aDefaultCharset) {
michael@0 1082 aOutput.Append(aInput, aLen);
michael@0 1083 return;
michael@0 1084 }
michael@0 1085
michael@0 1086 // Copy as long as it's US-ASCII. An ESC may indicate ISO 2022
michael@0 1087 // A ~ may indicate it is HZ
michael@0 1088 while (aLen && (c = uint8_t(*aInput++)) != 0x1B && c != '~' && !(c & 0x80)) {
michael@0 1089 aOutput.Append(char(c));
michael@0 1090 aLen--;
michael@0 1091 }
michael@0 1092 if (!aLen) {
michael@0 1093 return;
michael@0 1094 }
michael@0 1095 aInput--;
michael@0 1096
michael@0 1097 // skip ASCIIness/UTF8ness test if aInput is supected to be a 7bit non-ascii
michael@0 1098 // string and aDefaultCharset is a 7bit non-ascii charset.
michael@0 1099 bool skipCheck = (c == 0x1B || c == '~') &&
michael@0 1100 IS_7BIT_NON_ASCII_CHARSET(aDefaultCharset);
michael@0 1101
michael@0 1102 // If not UTF-8, treat as default charset
michael@0 1103 nsCOMPtr<nsIUTF8ConverterService>
michael@0 1104 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID));
michael@0 1105 nsAutoCString utf8Text;
michael@0 1106 if (cvtUTF8 &&
michael@0 1107 NS_SUCCEEDED(
michael@0 1108 cvtUTF8->ConvertStringToUTF8(Substring(aInput, aInput + aLen),
michael@0 1109 aDefaultCharset, skipCheck, true, 1,
michael@0 1110 utf8Text))) {
michael@0 1111 aOutput.Append(utf8Text);
michael@0 1112 } else { // replace each octet with Unicode replacement char in UTF-8.
michael@0 1113 for (uint32_t i = 0; i < aLen; i++) {
michael@0 1114 c = uint8_t(*aInput++);
michael@0 1115 if (c & 0x80)
michael@0 1116 aOutput.Append(REPLACEMENT_CHAR);
michael@0 1117 else
michael@0 1118 aOutput.Append(char(c));
michael@0 1119 }
michael@0 1120 }
michael@0 1121 }
michael@0 1122
michael@0 1123 nsresult DecodeQOrBase64Str(const char *aEncoded, size_t aLen, char aQOrBase64,
michael@0 1124 const char *aCharset, nsACString &aResult)
michael@0 1125 {
michael@0 1126 char *decodedText;
michael@0 1127 NS_ASSERTION(aQOrBase64 == 'Q' || aQOrBase64 == 'B', "Should be 'Q' or 'B'");
michael@0 1128 if(aQOrBase64 == 'Q')
michael@0 1129 decodedText = DecodeQ(aEncoded, aLen);
michael@0 1130 else if (aQOrBase64 == 'B') {
michael@0 1131 decodedText = PL_Base64Decode(aEncoded, aLen, nullptr);
michael@0 1132 } else {
michael@0 1133 return NS_ERROR_INVALID_ARG;
michael@0 1134 }
michael@0 1135
michael@0 1136 if (!decodedText) {
michael@0 1137 return NS_ERROR_INVALID_ARG;
michael@0 1138 }
michael@0 1139
michael@0 1140 nsresult rv;
michael@0 1141 nsCOMPtr<nsIUTF8ConverterService>
michael@0 1142 cvtUTF8(do_GetService(NS_UTF8CONVERTERSERVICE_CONTRACTID, &rv));
michael@0 1143 nsAutoCString utf8Text;
michael@0 1144 if (NS_SUCCEEDED(rv)) {
michael@0 1145 // skip ASCIIness/UTF8ness test if aCharset is 7bit non-ascii charset.
michael@0 1146 rv = cvtUTF8->ConvertStringToUTF8(nsDependentCString(decodedText),
michael@0 1147 aCharset,
michael@0 1148 IS_7BIT_NON_ASCII_CHARSET(aCharset),
michael@0 1149 true, 1, utf8Text);
michael@0 1150 }
michael@0 1151 PR_Free(decodedText);
michael@0 1152 if (NS_FAILED(rv)) {
michael@0 1153 return rv;
michael@0 1154 }
michael@0 1155 aResult.Append(utf8Text);
michael@0 1156
michael@0 1157 return NS_OK;
michael@0 1158 }
michael@0 1159
michael@0 1160 static const char especials[] = "()<>@,;:\\\"/[]?.=";
michael@0 1161
michael@0 1162 // |decode_mime_part2_str| taken from comi18n.c
michael@0 1163 // Decode RFC2047-encoded words in the input and convert the result to UTF-8.
michael@0 1164 // If aOverrideCharset is true, charset in RFC2047-encoded words is
michael@0 1165 // ignored and aDefaultCharset is assumed, instead. aDefaultCharset
michael@0 1166 // is also used to convert raw octets (without RFC 2047 encoding) to UTF-8.
michael@0 1167 //static
michael@0 1168 nsresult DecodeRFC2047Str(const char *aHeader, const char *aDefaultCharset,
michael@0 1169 bool aOverrideCharset, nsACString &aResult)
michael@0 1170 {
michael@0 1171 const char *p, *q = nullptr, *r;
michael@0 1172 const char *begin; // tracking pointer for where we are in the input buffer
michael@0 1173 int32_t isLastEncodedWord = 0;
michael@0 1174 const char *charsetStart, *charsetEnd;
michael@0 1175 nsAutoCString prevCharset, curCharset;
michael@0 1176 nsAutoCString encodedText;
michael@0 1177 char prevEncoding = '\0', curEncoding;
michael@0 1178 nsresult rv;
michael@0 1179
michael@0 1180 begin = aHeader;
michael@0 1181
michael@0 1182 // To avoid buffer realloc, if possible, set capacity in advance. No
michael@0 1183 // matter what, more than 3x expansion can never happen for all charsets
michael@0 1184 // supported by Mozilla. SCSU/BCSU with the sliding window set to a
michael@0 1185 // non-BMP block may be exceptions, but Mozilla does not support them.
michael@0 1186 // Neither any known mail/news program use them. Even if there's, we're
michael@0 1187 // safe because we don't use a raw *char any more.
michael@0 1188 aResult.SetCapacity(3 * strlen(aHeader));
michael@0 1189
michael@0 1190 while ((p = PL_strstr(begin, "=?")) != 0) {
michael@0 1191 if (isLastEncodedWord) {
michael@0 1192 // See if it's all whitespace.
michael@0 1193 for (q = begin; q < p; ++q) {
michael@0 1194 if (!PL_strchr(" \t\r\n", *q)) break;
michael@0 1195 }
michael@0 1196 }
michael@0 1197
michael@0 1198 if (!isLastEncodedWord || q < p) {
michael@0 1199 if (!encodedText.IsEmpty()) {
michael@0 1200 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0 1201 prevEncoding, prevCharset.get(), aResult);
michael@0 1202 if (NS_FAILED(rv)) {
michael@0 1203 aResult.Append(encodedText);
michael@0 1204 }
michael@0 1205 encodedText.Truncate();
michael@0 1206 prevCharset.Truncate();
michael@0 1207 prevEncoding = '\0';
michael@0 1208 }
michael@0 1209 // copy the part before the encoded-word
michael@0 1210 CopyRawHeader(begin, p - begin, aDefaultCharset, aResult);
michael@0 1211 begin = p;
michael@0 1212 }
michael@0 1213
michael@0 1214 p += 2;
michael@0 1215
michael@0 1216 // Get charset info
michael@0 1217 charsetStart = p;
michael@0 1218 charsetEnd = 0;
michael@0 1219 for (q = p; *q != '?'; q++) {
michael@0 1220 if (*q <= ' ' || PL_strchr(especials, *q)) {
michael@0 1221 goto badsyntax;
michael@0 1222 }
michael@0 1223
michael@0 1224 // RFC 2231 section 5
michael@0 1225 if (!charsetEnd && *q == '*') {
michael@0 1226 charsetEnd = q;
michael@0 1227 }
michael@0 1228 }
michael@0 1229 if (!charsetEnd) {
michael@0 1230 charsetEnd = q;
michael@0 1231 }
michael@0 1232
michael@0 1233 q++;
michael@0 1234 curEncoding = nsCRT::ToUpper(*q);
michael@0 1235 if (curEncoding != 'Q' && curEncoding != 'B')
michael@0 1236 goto badsyntax;
michael@0 1237
michael@0 1238 if (q[1] != '?')
michael@0 1239 goto badsyntax;
michael@0 1240
michael@0 1241 r = q;
michael@0 1242 for (r = q + 2; *r != '?'; r++) {
michael@0 1243 if (*r < ' ') goto badsyntax;
michael@0 1244 }
michael@0 1245 if (r[1] != '=')
michael@0 1246 goto badsyntax;
michael@0 1247 else if (r == q + 2) {
michael@0 1248 // it's empty, skip
michael@0 1249 begin = r + 2;
michael@0 1250 isLastEncodedWord = 1;
michael@0 1251 continue;
michael@0 1252 }
michael@0 1253
michael@0 1254 curCharset.Assign(charsetStart, charsetEnd - charsetStart);
michael@0 1255 // Override charset if requested. Never override labeled UTF-8.
michael@0 1256 // Use default charset instead of UNKNOWN-8BIT
michael@0 1257 if ((aOverrideCharset && 0 != nsCRT::strcasecmp(curCharset.get(), "UTF-8"))
michael@0 1258 || (aDefaultCharset && 0 == nsCRT::strcasecmp(curCharset.get(), "UNKNOWN-8BIT"))
michael@0 1259 ) {
michael@0 1260 curCharset = aDefaultCharset;
michael@0 1261 }
michael@0 1262
michael@0 1263 const char *R;
michael@0 1264 R = r;
michael@0 1265 if (curEncoding == 'B') {
michael@0 1266 // bug 227290. ignore an extraneous '=' at the end.
michael@0 1267 // (# of characters in B-encoded part has to be a multiple of 4)
michael@0 1268 int32_t n = r - (q + 2);
michael@0 1269 R -= (n % 4 == 1 && !PL_strncmp(r - 3, "===", 3)) ? 1 : 0;
michael@0 1270 }
michael@0 1271 // Bug 493544. Don't decode the encoded text until it ends
michael@0 1272 if (R[-1] != '='
michael@0 1273 && (prevCharset.IsEmpty()
michael@0 1274 || (curCharset == prevCharset && curEncoding == prevEncoding))
michael@0 1275 ) {
michael@0 1276 encodedText.Append(q + 2, R - (q + 2));
michael@0 1277 prevCharset = curCharset;
michael@0 1278 prevEncoding = curEncoding;
michael@0 1279
michael@0 1280 begin = r + 2;
michael@0 1281 isLastEncodedWord = 1;
michael@0 1282 continue;
michael@0 1283 }
michael@0 1284
michael@0 1285 bool bDecoded; // If the current line has been decoded.
michael@0 1286 bDecoded = false;
michael@0 1287 if (!encodedText.IsEmpty()) {
michael@0 1288 if (curCharset == prevCharset && curEncoding == prevEncoding) {
michael@0 1289 encodedText.Append(q + 2, R - (q + 2));
michael@0 1290 bDecoded = true;
michael@0 1291 }
michael@0 1292 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0 1293 prevEncoding, prevCharset.get(), aResult);
michael@0 1294 if (NS_FAILED(rv)) {
michael@0 1295 aResult.Append(encodedText);
michael@0 1296 }
michael@0 1297 encodedText.Truncate();
michael@0 1298 prevCharset.Truncate();
michael@0 1299 prevEncoding = '\0';
michael@0 1300 }
michael@0 1301 if (!bDecoded) {
michael@0 1302 rv = DecodeQOrBase64Str(q + 2, R - (q + 2), curEncoding,
michael@0 1303 curCharset.get(), aResult);
michael@0 1304 if (NS_FAILED(rv)) {
michael@0 1305 aResult.Append(encodedText);
michael@0 1306 }
michael@0 1307 }
michael@0 1308
michael@0 1309 begin = r + 2;
michael@0 1310 isLastEncodedWord = 1;
michael@0 1311 continue;
michael@0 1312
michael@0 1313 badsyntax:
michael@0 1314 if (!encodedText.IsEmpty()) {
michael@0 1315 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0 1316 prevEncoding, prevCharset.get(), aResult);
michael@0 1317 if (NS_FAILED(rv)) {
michael@0 1318 aResult.Append(encodedText);
michael@0 1319 }
michael@0 1320 encodedText.Truncate();
michael@0 1321 prevCharset.Truncate();
michael@0 1322 }
michael@0 1323 // copy the part before the encoded-word
michael@0 1324 aResult.Append(begin, p - begin);
michael@0 1325 begin = p;
michael@0 1326 isLastEncodedWord = 0;
michael@0 1327 }
michael@0 1328
michael@0 1329 if (!encodedText.IsEmpty()) {
michael@0 1330 rv = DecodeQOrBase64Str(encodedText.get(), encodedText.Length(),
michael@0 1331 prevEncoding, prevCharset.get(), aResult);
michael@0 1332 if (NS_FAILED(rv)) {
michael@0 1333 aResult.Append(encodedText);
michael@0 1334 }
michael@0 1335 }
michael@0 1336
michael@0 1337 // put the tail back
michael@0 1338 CopyRawHeader(begin, strlen(begin), aDefaultCharset, aResult);
michael@0 1339
michael@0 1340 nsAutoCString tempStr(aResult);
michael@0 1341 tempStr.ReplaceChar('\t', ' ');
michael@0 1342 aResult = tempStr;
michael@0 1343
michael@0 1344 return NS_OK;
michael@0 1345 }

mercurial