intl/uconv/src/nsUTF8ConverterService.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim:expandtab:shiftwidth=2:tabstop=4:
michael@0 3 */
michael@0 4 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 5 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 6 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 7 #include "nsString.h"
michael@0 8 #include "nsICharsetConverterManager.h"
michael@0 9 #include "nsUTF8ConverterService.h"
michael@0 10 #include "nsEscape.h"
michael@0 11 #include "nsAutoPtr.h"
michael@0 12 #include "nsServiceManagerUtils.h"
michael@0 13
michael@0 14 NS_IMPL_ISUPPORTS(nsUTF8ConverterService, nsIUTF8ConverterService)
michael@0 15
michael@0 16 static nsresult
michael@0 17 ToUTF8(const nsACString &aString, const char *aCharset,
michael@0 18 bool aAllowSubstitution, nsACString &aResult)
michael@0 19 {
michael@0 20 nsresult rv;
michael@0 21 if (!aCharset || !*aCharset)
michael@0 22 return NS_ERROR_INVALID_ARG;
michael@0 23
michael@0 24 nsCOMPtr<nsICharsetConverterManager> ccm;
michael@0 25
michael@0 26 ccm = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
michael@0 27 NS_ENSURE_SUCCESS(rv, rv);
michael@0 28
michael@0 29 nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
michael@0 30 rv = ccm->GetUnicodeDecoder(aCharset,
michael@0 31 getter_AddRefs(unicodeDecoder));
michael@0 32 NS_ENSURE_SUCCESS(rv, rv);
michael@0 33
michael@0 34 if (!aAllowSubstitution)
michael@0 35 unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
michael@0 36
michael@0 37 int32_t srcLen = aString.Length();
michael@0 38 int32_t dstLen;
michael@0 39 const nsAFlatCString& inStr = PromiseFlatCString(aString);
michael@0 40 rv = unicodeDecoder->GetMaxLength(inStr.get(), srcLen, &dstLen);
michael@0 41 NS_ENSURE_SUCCESS(rv, rv);
michael@0 42
michael@0 43 nsAutoArrayPtr<char16_t> ustr(new char16_t[dstLen]);
michael@0 44 NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
michael@0 45
michael@0 46 rv = unicodeDecoder->Convert(inStr.get(), &srcLen, ustr, &dstLen);
michael@0 47 if (NS_SUCCEEDED(rv)){
michael@0 48 // Tru64 Cxx needs an explicit get()
michael@0 49 CopyUTF16toUTF8(Substring(ustr.get(), ustr + dstLen), aResult);
michael@0 50 }
michael@0 51 return rv;
michael@0 52 }
michael@0 53
michael@0 54 NS_IMETHODIMP
michael@0 55 nsUTF8ConverterService::ConvertStringToUTF8(const nsACString &aString,
michael@0 56 const char *aCharset,
michael@0 57 bool aSkipCheck,
michael@0 58 bool aAllowSubstitution,
michael@0 59 uint8_t aOptionalArgc,
michael@0 60 nsACString &aUTF8String)
michael@0 61 {
michael@0 62 bool allowSubstitution = (aOptionalArgc == 1) ? aAllowSubstitution : true;
michael@0 63
michael@0 64 // return if ASCII only or valid UTF-8 providing that the ASCII/UTF-8
michael@0 65 // check is requested. It may not be asked for if a caller suspects
michael@0 66 // that the input is in non-ASCII 7bit charset (ISO-2022-xx, HZ) or
michael@0 67 // it's in a charset other than UTF-8 that can be mistaken for UTF-8.
michael@0 68 if (!aSkipCheck && (IsASCII(aString) || IsUTF8(aString))) {
michael@0 69 aUTF8String = aString;
michael@0 70 return NS_OK;
michael@0 71 }
michael@0 72
michael@0 73 aUTF8String.Truncate();
michael@0 74
michael@0 75 nsresult rv = ToUTF8(aString, aCharset, allowSubstitution, aUTF8String);
michael@0 76
michael@0 77 // additional protection for cases where check is skipped and the input
michael@0 78 // is actually in UTF-8 as opposed to aCharset. (i.e. caller's hunch
michael@0 79 // was wrong.) We don't check ASCIIness assuming there's no charset
michael@0 80 // incompatible with ASCII (we don't support EBCDIC).
michael@0 81 if (aSkipCheck && NS_FAILED(rv) && IsUTF8(aString)) {
michael@0 82 aUTF8String = aString;
michael@0 83 return NS_OK;
michael@0 84 }
michael@0 85
michael@0 86 return rv;
michael@0 87 }
michael@0 88
michael@0 89 NS_IMETHODIMP
michael@0 90 nsUTF8ConverterService::ConvertURISpecToUTF8(const nsACString &aSpec,
michael@0 91 const char *aCharset,
michael@0 92 nsACString &aUTF8Spec)
michael@0 93 {
michael@0 94 // assume UTF-8 if the spec contains unescaped non-ASCII characters.
michael@0 95 // No valid spec in Mozilla would break this assumption.
michael@0 96 if (!IsASCII(aSpec)) {
michael@0 97 aUTF8Spec = aSpec;
michael@0 98 return NS_OK;
michael@0 99 }
michael@0 100
michael@0 101 aUTF8Spec.Truncate();
michael@0 102
michael@0 103 nsAutoCString unescapedSpec;
michael@0 104 // NS_UnescapeURL does not fill up unescapedSpec unless there's at least
michael@0 105 // one character to unescape.
michael@0 106 bool written = NS_UnescapeURL(PromiseFlatCString(aSpec).get(), aSpec.Length(),
michael@0 107 esc_OnlyNonASCII, unescapedSpec);
michael@0 108
michael@0 109 if (!written) {
michael@0 110 aUTF8Spec = aSpec;
michael@0 111 return NS_OK;
michael@0 112 }
michael@0 113 // return if ASCII only or escaped UTF-8
michael@0 114 if (IsASCII(unescapedSpec) || IsUTF8(unescapedSpec)) {
michael@0 115 aUTF8Spec = unescapedSpec;
michael@0 116 return NS_OK;
michael@0 117 }
michael@0 118
michael@0 119 return ToUTF8(unescapedSpec, aCharset, true, aUTF8Spec);
michael@0 120 }
michael@0 121

mercurial