intl/uconv/src/nsTextToSubURI.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

michael@0 1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5 #include "nsString.h"
michael@0 6 #include "nsIUnicodeEncoder.h"
michael@0 7 #include "nsICharsetConverterManager.h"
michael@0 8 #include "nsITextToSubURI.h"
michael@0 9 #include "nsEscape.h"
michael@0 10 #include "nsTextToSubURI.h"
michael@0 11 #include "nsCRT.h"
michael@0 12 #include "nsServiceManagerUtils.h"
michael@0 13
michael@0 14 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
michael@0 15
michael@0 16 nsTextToSubURI::nsTextToSubURI()
michael@0 17 {
michael@0 18 }
michael@0 19 nsTextToSubURI::~nsTextToSubURI()
michael@0 20 {
michael@0 21 }
michael@0 22
michael@0 23 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
michael@0 24
michael@0 25 NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape(
michael@0 26 const char *charset, const char16_t *text, char **_retval)
michael@0 27 {
michael@0 28 if(nullptr == _retval)
michael@0 29 return NS_ERROR_NULL_POINTER;
michael@0 30 *_retval = nullptr;
michael@0 31 nsresult rv = NS_OK;
michael@0 32
michael@0 33 // Get Charset, get the encoder.
michael@0 34 nsICharsetConverterManager *ccm;
michael@0 35 rv = CallGetService(kCharsetConverterManagerCID, &ccm);
michael@0 36 if(NS_SUCCEEDED(rv)) {
michael@0 37 nsIUnicodeEncoder *encoder;
michael@0 38 rv = ccm->GetUnicodeEncoder(charset, &encoder);
michael@0 39 NS_RELEASE(ccm);
michael@0 40 if (NS_SUCCEEDED(rv)) {
michael@0 41 rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?');
michael@0 42 if(NS_SUCCEEDED(rv))
michael@0 43 {
michael@0 44 char buf[256];
michael@0 45 char *pBuf = buf;
michael@0 46 int32_t ulen = text ? NS_strlen(text) : 0;
michael@0 47 int32_t outlen = 0;
michael@0 48 if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen)))
michael@0 49 {
michael@0 50 if(outlen >= 256) {
michael@0 51 pBuf = (char*)NS_Alloc(outlen+1);
michael@0 52 }
michael@0 53 if(nullptr == pBuf) {
michael@0 54 outlen = 255;
michael@0 55 pBuf = buf;
michael@0 56 }
michael@0 57 int32_t bufLen = outlen;
michael@0 58 if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
michael@0 59 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
michael@0 60 int32_t finLen = bufLen - outlen;
michael@0 61 if (finLen > 0) {
michael@0 62 if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
michael@0 63 outlen += finLen;
michael@0 64 }
michael@0 65 pBuf[outlen] = '\0';
michael@0 66 *_retval = nsEscape(pBuf, url_XPAlphas);
michael@0 67 if(nullptr == *_retval)
michael@0 68 rv = NS_ERROR_OUT_OF_MEMORY;
michael@0 69 }
michael@0 70 }
michael@0 71 if(pBuf != buf)
michael@0 72 NS_Free(pBuf);
michael@0 73 }
michael@0 74 NS_RELEASE(encoder);
michael@0 75 }
michael@0 76 }
michael@0 77
michael@0 78 return rv;
michael@0 79 }
michael@0 80
michael@0 81 NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(
michael@0 82 const char *charset, const char *text, char16_t **_retval)
michael@0 83 {
michael@0 84 if(nullptr == _retval)
michael@0 85 return NS_ERROR_NULL_POINTER;
michael@0 86 if(nullptr == text) {
michael@0 87 // set empty string instead of returning error
michael@0 88 // due to compatibility for old version
michael@0 89 text = "";
michael@0 90 }
michael@0 91 *_retval = nullptr;
michael@0 92 nsresult rv = NS_OK;
michael@0 93
michael@0 94 // unescape the string, unescape changes the input
michael@0 95 char *unescaped = NS_strdup(text);
michael@0 96 if (nullptr == unescaped)
michael@0 97 return NS_ERROR_OUT_OF_MEMORY;
michael@0 98 unescaped = nsUnescape(unescaped);
michael@0 99 NS_ASSERTION(unescaped, "nsUnescape returned null");
michael@0 100
michael@0 101 // Convert from the charset to unicode
michael@0 102 nsCOMPtr<nsICharsetConverterManager> ccm =
michael@0 103 do_GetService(kCharsetConverterManagerCID, &rv);
michael@0 104 if (NS_SUCCEEDED(rv)) {
michael@0 105 nsIUnicodeDecoder *decoder;
michael@0 106 rv = ccm->GetUnicodeDecoder(charset, &decoder);
michael@0 107 if (NS_SUCCEEDED(rv)) {
michael@0 108 char16_t *pBuf = nullptr;
michael@0 109 int32_t len = strlen(unescaped);
michael@0 110 int32_t outlen = 0;
michael@0 111 if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
michael@0 112 pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t));
michael@0 113 if (nullptr == pBuf)
michael@0 114 rv = NS_ERROR_OUT_OF_MEMORY;
michael@0 115 else {
michael@0 116 if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
michael@0 117 pBuf[outlen] = 0;
michael@0 118 *_retval = pBuf;
michael@0 119 }
michael@0 120 else
michael@0 121 NS_Free(pBuf);
michael@0 122 }
michael@0 123 }
michael@0 124 NS_RELEASE(decoder);
michael@0 125 }
michael@0 126 }
michael@0 127 NS_Free(unescaped);
michael@0 128
michael@0 129 return rv;
michael@0 130 }
michael@0 131
michael@0 132 static bool statefulCharset(const char *charset)
michael@0 133 {
michael@0 134 if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
michael@0 135 !nsCRT::strcasecmp(charset, "UTF-7") ||
michael@0 136 !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
michael@0 137 return true;
michael@0 138
michael@0 139 return false;
michael@0 140 }
michael@0 141
michael@0 142 nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
michael@0 143 const nsAFlatCString &aURI,
michael@0 144 bool aIRI,
michael@0 145 nsAString &_retval)
michael@0 146 {
michael@0 147 nsresult rv = NS_OK;
michael@0 148
michael@0 149 // check for 7bit encoding the data may not be ASCII after we decode
michael@0 150 bool isStatefulCharset = statefulCharset(aCharset.get());
michael@0 151
michael@0 152 if (!isStatefulCharset && IsASCII(aURI)) {
michael@0 153 CopyASCIItoUTF16(aURI, _retval);
michael@0 154 return rv;
michael@0 155 }
michael@0 156
michael@0 157 if (!isStatefulCharset && aIRI) {
michael@0 158 if (IsUTF8(aURI)) {
michael@0 159 CopyUTF8toUTF16(aURI, _retval);
michael@0 160 return rv;
michael@0 161 }
michael@0 162 }
michael@0 163
michael@0 164 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
michael@0 165 NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
michael@0 166
michael@0 167 nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
michael@0 168
michael@0 169 charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
michael@0 170 NS_ENSURE_SUCCESS(rv, rv);
michael@0 171
michael@0 172 nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
michael@0 173 rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(),
michael@0 174 getter_AddRefs(unicodeDecoder));
michael@0 175 NS_ENSURE_SUCCESS(rv, rv);
michael@0 176 unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
michael@0 177
michael@0 178 int32_t srcLen = aURI.Length();
michael@0 179 int32_t dstLen;
michael@0 180 rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
michael@0 181 NS_ENSURE_SUCCESS(rv, rv);
michael@0 182
michael@0 183 char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t));
michael@0 184 NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
michael@0 185
michael@0 186 rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
michael@0 187
michael@0 188 if (NS_SUCCEEDED(rv))
michael@0 189 _retval.Assign(ustr, dstLen);
michael@0 190
michael@0 191 NS_Free(ustr);
michael@0 192
michael@0 193 return rv;
michael@0 194 }
michael@0 195
michael@0 196 NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
michael@0 197 const nsACString &aURIFragment,
michael@0 198 nsAString &_retval)
michael@0 199 {
michael@0 200 nsAutoCString unescapedSpec;
michael@0 201 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
michael@0 202 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
michael@0 203 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
michael@0 204
michael@0 205 // in case of failure, return escaped URI
michael@0 206 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
michael@0 207 // sequences are also considered failure in this context
michael@0 208 if (convertURItoUnicode(
michael@0 209 PromiseFlatCString(aCharset), unescapedSpec, true, _retval)
michael@0 210 != NS_OK)
michael@0 211 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
michael@0 212 CopyUTF8toUTF16(aURIFragment, _retval);
michael@0 213 return NS_OK;
michael@0 214 }
michael@0 215
michael@0 216 NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset,
michael@0 217 const nsACString & aURIFragment,
michael@0 218 nsAString &_retval)
michael@0 219 {
michael@0 220 nsAutoCString unescapedSpec;
michael@0 221 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
michael@0 222 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
michael@0 223 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
michael@0 224 // superset since converting "http:" with such an encoding is always a bad
michael@0 225 // idea.
michael@0 226 if (!IsUTF8(unescapedSpec) &&
michael@0 227 (aCharset.LowerCaseEqualsLiteral("utf-16") ||
michael@0 228 aCharset.LowerCaseEqualsLiteral("utf-16be") ||
michael@0 229 aCharset.LowerCaseEqualsLiteral("utf-16le") ||
michael@0 230 aCharset.LowerCaseEqualsLiteral("utf-7") ||
michael@0 231 aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
michael@0 232 CopyASCIItoUTF16(aURIFragment, _retval);
michael@0 233 return NS_OK;
michael@0 234 }
michael@0 235
michael@0 236 return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval);
michael@0 237 }
michael@0 238
michael@0 239 //----------------------------------------------------------------------

mercurial