1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/intl/uconv/src/nsTextToSubURI.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,239 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 +#include "nsString.h" 1.9 +#include "nsIUnicodeEncoder.h" 1.10 +#include "nsICharsetConverterManager.h" 1.11 +#include "nsITextToSubURI.h" 1.12 +#include "nsEscape.h" 1.13 +#include "nsTextToSubURI.h" 1.14 +#include "nsCRT.h" 1.15 +#include "nsServiceManagerUtils.h" 1.16 + 1.17 +static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); 1.18 + 1.19 +nsTextToSubURI::nsTextToSubURI() 1.20 +{ 1.21 +} 1.22 +nsTextToSubURI::~nsTextToSubURI() 1.23 +{ 1.24 +} 1.25 + 1.26 +NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) 1.27 + 1.28 +NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape( 1.29 + const char *charset, const char16_t *text, char **_retval) 1.30 +{ 1.31 + if(nullptr == _retval) 1.32 + return NS_ERROR_NULL_POINTER; 1.33 + *_retval = nullptr; 1.34 + nsresult rv = NS_OK; 1.35 + 1.36 + // Get Charset, get the encoder. 1.37 + nsICharsetConverterManager *ccm; 1.38 + rv = CallGetService(kCharsetConverterManagerCID, &ccm); 1.39 + if(NS_SUCCEEDED(rv)) { 1.40 + nsIUnicodeEncoder *encoder; 1.41 + rv = ccm->GetUnicodeEncoder(charset, &encoder); 1.42 + NS_RELEASE(ccm); 1.43 + if (NS_SUCCEEDED(rv)) { 1.44 + rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?'); 1.45 + if(NS_SUCCEEDED(rv)) 1.46 + { 1.47 + char buf[256]; 1.48 + char *pBuf = buf; 1.49 + int32_t ulen = text ? NS_strlen(text) : 0; 1.50 + int32_t outlen = 0; 1.51 + if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) 1.52 + { 1.53 + if(outlen >= 256) { 1.54 + pBuf = (char*)NS_Alloc(outlen+1); 1.55 + } 1.56 + if(nullptr == pBuf) { 1.57 + outlen = 255; 1.58 + pBuf = buf; 1.59 + } 1.60 + int32_t bufLen = outlen; 1.61 + if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) { 1.62 + // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary 1.63 + int32_t finLen = bufLen - outlen; 1.64 + if (finLen > 0) { 1.65 + if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen))) 1.66 + outlen += finLen; 1.67 + } 1.68 + pBuf[outlen] = '\0'; 1.69 + *_retval = nsEscape(pBuf, url_XPAlphas); 1.70 + if(nullptr == *_retval) 1.71 + rv = NS_ERROR_OUT_OF_MEMORY; 1.72 + } 1.73 + } 1.74 + if(pBuf != buf) 1.75 + NS_Free(pBuf); 1.76 + } 1.77 + NS_RELEASE(encoder); 1.78 + } 1.79 + } 1.80 + 1.81 + return rv; 1.82 +} 1.83 + 1.84 +NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert( 1.85 + const char *charset, const char *text, char16_t **_retval) 1.86 +{ 1.87 + if(nullptr == _retval) 1.88 + return NS_ERROR_NULL_POINTER; 1.89 + if(nullptr == text) { 1.90 + // set empty string instead of returning error 1.91 + // due to compatibility for old version 1.92 + text = ""; 1.93 + } 1.94 + *_retval = nullptr; 1.95 + nsresult rv = NS_OK; 1.96 + 1.97 + // unescape the string, unescape changes the input 1.98 + char *unescaped = NS_strdup(text); 1.99 + if (nullptr == unescaped) 1.100 + return NS_ERROR_OUT_OF_MEMORY; 1.101 + unescaped = nsUnescape(unescaped); 1.102 + NS_ASSERTION(unescaped, "nsUnescape returned null"); 1.103 + 1.104 + // Convert from the charset to unicode 1.105 + nsCOMPtr<nsICharsetConverterManager> ccm = 1.106 + do_GetService(kCharsetConverterManagerCID, &rv); 1.107 + if (NS_SUCCEEDED(rv)) { 1.108 + nsIUnicodeDecoder *decoder; 1.109 + rv = ccm->GetUnicodeDecoder(charset, &decoder); 1.110 + if (NS_SUCCEEDED(rv)) { 1.111 + char16_t *pBuf = nullptr; 1.112 + int32_t len = strlen(unescaped); 1.113 + int32_t outlen = 0; 1.114 + if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) { 1.115 + pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t)); 1.116 + if (nullptr == pBuf) 1.117 + rv = NS_ERROR_OUT_OF_MEMORY; 1.118 + else { 1.119 + if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) { 1.120 + pBuf[outlen] = 0; 1.121 + *_retval = pBuf; 1.122 + } 1.123 + else 1.124 + NS_Free(pBuf); 1.125 + } 1.126 + } 1.127 + NS_RELEASE(decoder); 1.128 + } 1.129 + } 1.130 + NS_Free(unescaped); 1.131 + 1.132 + return rv; 1.133 +} 1.134 + 1.135 +static bool statefulCharset(const char *charset) 1.136 +{ 1.137 + if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) || 1.138 + !nsCRT::strcasecmp(charset, "UTF-7") || 1.139 + !nsCRT::strcasecmp(charset, "HZ-GB-2312")) 1.140 + return true; 1.141 + 1.142 + return false; 1.143 +} 1.144 + 1.145 +nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset, 1.146 + const nsAFlatCString &aURI, 1.147 + bool aIRI, 1.148 + nsAString &_retval) 1.149 +{ 1.150 + nsresult rv = NS_OK; 1.151 + 1.152 + // check for 7bit encoding the data may not be ASCII after we decode 1.153 + bool isStatefulCharset = statefulCharset(aCharset.get()); 1.154 + 1.155 + if (!isStatefulCharset && IsASCII(aURI)) { 1.156 + CopyASCIItoUTF16(aURI, _retval); 1.157 + return rv; 1.158 + } 1.159 + 1.160 + if (!isStatefulCharset && aIRI) { 1.161 + if (IsUTF8(aURI)) { 1.162 + CopyUTF8toUTF16(aURI, _retval); 1.163 + return rv; 1.164 + } 1.165 + } 1.166 + 1.167 + // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. 1.168 + NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); 1.169 + 1.170 + nsCOMPtr<nsICharsetConverterManager> charsetConverterManager; 1.171 + 1.172 + charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); 1.173 + NS_ENSURE_SUCCESS(rv, rv); 1.174 + 1.175 + nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder; 1.176 + rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), 1.177 + getter_AddRefs(unicodeDecoder)); 1.178 + NS_ENSURE_SUCCESS(rv, rv); 1.179 + unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); 1.180 + 1.181 + int32_t srcLen = aURI.Length(); 1.182 + int32_t dstLen; 1.183 + rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen); 1.184 + NS_ENSURE_SUCCESS(rv, rv); 1.185 + 1.186 + char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t)); 1.187 + NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); 1.188 + 1.189 + rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen); 1.190 + 1.191 + if (NS_SUCCEEDED(rv)) 1.192 + _retval.Assign(ustr, dstLen); 1.193 + 1.194 + NS_Free(ustr); 1.195 + 1.196 + return rv; 1.197 +} 1.198 + 1.199 +NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, 1.200 + const nsACString &aURIFragment, 1.201 + nsAString &_retval) 1.202 +{ 1.203 + nsAutoCString unescapedSpec; 1.204 + // skip control octets (0x00 - 0x1f and 0x7f) when unescaping 1.205 + NS_UnescapeURL(PromiseFlatCString(aURIFragment), 1.206 + esc_SkipControl | esc_AlwaysCopy, unescapedSpec); 1.207 + 1.208 + // in case of failure, return escaped URI 1.209 + // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte 1.210 + // sequences are also considered failure in this context 1.211 + if (convertURItoUnicode( 1.212 + PromiseFlatCString(aCharset), unescapedSpec, true, _retval) 1.213 + != NS_OK) 1.214 + // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 1.215 + CopyUTF8toUTF16(aURIFragment, _retval); 1.216 + return NS_OK; 1.217 +} 1.218 + 1.219 +NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, 1.220 + const nsACString & aURIFragment, 1.221 + nsAString &_retval) 1.222 +{ 1.223 + nsAutoCString unescapedSpec; 1.224 + NS_UnescapeURL(PromiseFlatCString(aURIFragment), 1.225 + esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); 1.226 + // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII 1.227 + // superset since converting "http:" with such an encoding is always a bad 1.228 + // idea. 1.229 + if (!IsUTF8(unescapedSpec) && 1.230 + (aCharset.LowerCaseEqualsLiteral("utf-16") || 1.231 + aCharset.LowerCaseEqualsLiteral("utf-16be") || 1.232 + aCharset.LowerCaseEqualsLiteral("utf-16le") || 1.233 + aCharset.LowerCaseEqualsLiteral("utf-7") || 1.234 + aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){ 1.235 + CopyASCIItoUTF16(aURIFragment, _retval); 1.236 + return NS_OK; 1.237 + } 1.238 + 1.239 + return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval); 1.240 +} 1.241 + 1.242 +//----------------------------------------------------------------------