Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | #include "nsString.h" |
michael@0 | 6 | #include "nsIUnicodeEncoder.h" |
michael@0 | 7 | #include "nsICharsetConverterManager.h" |
michael@0 | 8 | #include "nsITextToSubURI.h" |
michael@0 | 9 | #include "nsEscape.h" |
michael@0 | 10 | #include "nsTextToSubURI.h" |
michael@0 | 11 | #include "nsCRT.h" |
michael@0 | 12 | #include "nsServiceManagerUtils.h" |
michael@0 | 13 | |
michael@0 | 14 | static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); |
michael@0 | 15 | |
michael@0 | 16 | nsTextToSubURI::nsTextToSubURI() |
michael@0 | 17 | { |
michael@0 | 18 | } |
michael@0 | 19 | nsTextToSubURI::~nsTextToSubURI() |
michael@0 | 20 | { |
michael@0 | 21 | } |
michael@0 | 22 | |
michael@0 | 23 | NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) |
michael@0 | 24 | |
michael@0 | 25 | NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape( |
michael@0 | 26 | const char *charset, const char16_t *text, char **_retval) |
michael@0 | 27 | { |
michael@0 | 28 | if(nullptr == _retval) |
michael@0 | 29 | return NS_ERROR_NULL_POINTER; |
michael@0 | 30 | *_retval = nullptr; |
michael@0 | 31 | nsresult rv = NS_OK; |
michael@0 | 32 | |
michael@0 | 33 | // Get Charset, get the encoder. |
michael@0 | 34 | nsICharsetConverterManager *ccm; |
michael@0 | 35 | rv = CallGetService(kCharsetConverterManagerCID, &ccm); |
michael@0 | 36 | if(NS_SUCCEEDED(rv)) { |
michael@0 | 37 | nsIUnicodeEncoder *encoder; |
michael@0 | 38 | rv = ccm->GetUnicodeEncoder(charset, &encoder); |
michael@0 | 39 | NS_RELEASE(ccm); |
michael@0 | 40 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 41 | rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?'); |
michael@0 | 42 | if(NS_SUCCEEDED(rv)) |
michael@0 | 43 | { |
michael@0 | 44 | char buf[256]; |
michael@0 | 45 | char *pBuf = buf; |
michael@0 | 46 | int32_t ulen = text ? NS_strlen(text) : 0; |
michael@0 | 47 | int32_t outlen = 0; |
michael@0 | 48 | if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) |
michael@0 | 49 | { |
michael@0 | 50 | if(outlen >= 256) { |
michael@0 | 51 | pBuf = (char*)NS_Alloc(outlen+1); |
michael@0 | 52 | } |
michael@0 | 53 | if(nullptr == pBuf) { |
michael@0 | 54 | outlen = 255; |
michael@0 | 55 | pBuf = buf; |
michael@0 | 56 | } |
michael@0 | 57 | int32_t bufLen = outlen; |
michael@0 | 58 | if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) { |
michael@0 | 59 | // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary |
michael@0 | 60 | int32_t finLen = bufLen - outlen; |
michael@0 | 61 | if (finLen > 0) { |
michael@0 | 62 | if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen))) |
michael@0 | 63 | outlen += finLen; |
michael@0 | 64 | } |
michael@0 | 65 | pBuf[outlen] = '\0'; |
michael@0 | 66 | *_retval = nsEscape(pBuf, url_XPAlphas); |
michael@0 | 67 | if(nullptr == *_retval) |
michael@0 | 68 | rv = NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 69 | } |
michael@0 | 70 | } |
michael@0 | 71 | if(pBuf != buf) |
michael@0 | 72 | NS_Free(pBuf); |
michael@0 | 73 | } |
michael@0 | 74 | NS_RELEASE(encoder); |
michael@0 | 75 | } |
michael@0 | 76 | } |
michael@0 | 77 | |
michael@0 | 78 | return rv; |
michael@0 | 79 | } |
michael@0 | 80 | |
michael@0 | 81 | NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert( |
michael@0 | 82 | const char *charset, const char *text, char16_t **_retval) |
michael@0 | 83 | { |
michael@0 | 84 | if(nullptr == _retval) |
michael@0 | 85 | return NS_ERROR_NULL_POINTER; |
michael@0 | 86 | if(nullptr == text) { |
michael@0 | 87 | // set empty string instead of returning error |
michael@0 | 88 | // due to compatibility for old version |
michael@0 | 89 | text = ""; |
michael@0 | 90 | } |
michael@0 | 91 | *_retval = nullptr; |
michael@0 | 92 | nsresult rv = NS_OK; |
michael@0 | 93 | |
michael@0 | 94 | // unescape the string, unescape changes the input |
michael@0 | 95 | char *unescaped = NS_strdup(text); |
michael@0 | 96 | if (nullptr == unescaped) |
michael@0 | 97 | return NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 98 | unescaped = nsUnescape(unescaped); |
michael@0 | 99 | NS_ASSERTION(unescaped, "nsUnescape returned null"); |
michael@0 | 100 | |
michael@0 | 101 | // Convert from the charset to unicode |
michael@0 | 102 | nsCOMPtr<nsICharsetConverterManager> ccm = |
michael@0 | 103 | do_GetService(kCharsetConverterManagerCID, &rv); |
michael@0 | 104 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 105 | nsIUnicodeDecoder *decoder; |
michael@0 | 106 | rv = ccm->GetUnicodeDecoder(charset, &decoder); |
michael@0 | 107 | if (NS_SUCCEEDED(rv)) { |
michael@0 | 108 | char16_t *pBuf = nullptr; |
michael@0 | 109 | int32_t len = strlen(unescaped); |
michael@0 | 110 | int32_t outlen = 0; |
michael@0 | 111 | if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) { |
michael@0 | 112 | pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t)); |
michael@0 | 113 | if (nullptr == pBuf) |
michael@0 | 114 | rv = NS_ERROR_OUT_OF_MEMORY; |
michael@0 | 115 | else { |
michael@0 | 116 | if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) { |
michael@0 | 117 | pBuf[outlen] = 0; |
michael@0 | 118 | *_retval = pBuf; |
michael@0 | 119 | } |
michael@0 | 120 | else |
michael@0 | 121 | NS_Free(pBuf); |
michael@0 | 122 | } |
michael@0 | 123 | } |
michael@0 | 124 | NS_RELEASE(decoder); |
michael@0 | 125 | } |
michael@0 | 126 | } |
michael@0 | 127 | NS_Free(unescaped); |
michael@0 | 128 | |
michael@0 | 129 | return rv; |
michael@0 | 130 | } |
michael@0 | 131 | |
michael@0 | 132 | static bool statefulCharset(const char *charset) |
michael@0 | 133 | { |
michael@0 | 134 | if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) || |
michael@0 | 135 | !nsCRT::strcasecmp(charset, "UTF-7") || |
michael@0 | 136 | !nsCRT::strcasecmp(charset, "HZ-GB-2312")) |
michael@0 | 137 | return true; |
michael@0 | 138 | |
michael@0 | 139 | return false; |
michael@0 | 140 | } |
michael@0 | 141 | |
michael@0 | 142 | nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset, |
michael@0 | 143 | const nsAFlatCString &aURI, |
michael@0 | 144 | bool aIRI, |
michael@0 | 145 | nsAString &_retval) |
michael@0 | 146 | { |
michael@0 | 147 | nsresult rv = NS_OK; |
michael@0 | 148 | |
michael@0 | 149 | // check for 7bit encoding the data may not be ASCII after we decode |
michael@0 | 150 | bool isStatefulCharset = statefulCharset(aCharset.get()); |
michael@0 | 151 | |
michael@0 | 152 | if (!isStatefulCharset && IsASCII(aURI)) { |
michael@0 | 153 | CopyASCIItoUTF16(aURI, _retval); |
michael@0 | 154 | return rv; |
michael@0 | 155 | } |
michael@0 | 156 | |
michael@0 | 157 | if (!isStatefulCharset && aIRI) { |
michael@0 | 158 | if (IsUTF8(aURI)) { |
michael@0 | 159 | CopyUTF8toUTF16(aURI, _retval); |
michael@0 | 160 | return rv; |
michael@0 | 161 | } |
michael@0 | 162 | } |
michael@0 | 163 | |
michael@0 | 164 | // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. |
michael@0 | 165 | NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); |
michael@0 | 166 | |
michael@0 | 167 | nsCOMPtr<nsICharsetConverterManager> charsetConverterManager; |
michael@0 | 168 | |
michael@0 | 169 | charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); |
michael@0 | 170 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 171 | |
michael@0 | 172 | nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder; |
michael@0 | 173 | rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), |
michael@0 | 174 | getter_AddRefs(unicodeDecoder)); |
michael@0 | 175 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 176 | unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); |
michael@0 | 177 | |
michael@0 | 178 | int32_t srcLen = aURI.Length(); |
michael@0 | 179 | int32_t dstLen; |
michael@0 | 180 | rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen); |
michael@0 | 181 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 182 | |
michael@0 | 183 | char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t)); |
michael@0 | 184 | NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 185 | |
michael@0 | 186 | rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen); |
michael@0 | 187 | |
michael@0 | 188 | if (NS_SUCCEEDED(rv)) |
michael@0 | 189 | _retval.Assign(ustr, dstLen); |
michael@0 | 190 | |
michael@0 | 191 | NS_Free(ustr); |
michael@0 | 192 | |
michael@0 | 193 | return rv; |
michael@0 | 194 | } |
michael@0 | 195 | |
michael@0 | 196 | NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, |
michael@0 | 197 | const nsACString &aURIFragment, |
michael@0 | 198 | nsAString &_retval) |
michael@0 | 199 | { |
michael@0 | 200 | nsAutoCString unescapedSpec; |
michael@0 | 201 | // skip control octets (0x00 - 0x1f and 0x7f) when unescaping |
michael@0 | 202 | NS_UnescapeURL(PromiseFlatCString(aURIFragment), |
michael@0 | 203 | esc_SkipControl | esc_AlwaysCopy, unescapedSpec); |
michael@0 | 204 | |
michael@0 | 205 | // in case of failure, return escaped URI |
michael@0 | 206 | // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte |
michael@0 | 207 | // sequences are also considered failure in this context |
michael@0 | 208 | if (convertURItoUnicode( |
michael@0 | 209 | PromiseFlatCString(aCharset), unescapedSpec, true, _retval) |
michael@0 | 210 | != NS_OK) |
michael@0 | 211 | // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 |
michael@0 | 212 | CopyUTF8toUTF16(aURIFragment, _retval); |
michael@0 | 213 | return NS_OK; |
michael@0 | 214 | } |
michael@0 | 215 | |
michael@0 | 216 | NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, |
michael@0 | 217 | const nsACString & aURIFragment, |
michael@0 | 218 | nsAString &_retval) |
michael@0 | 219 | { |
michael@0 | 220 | nsAutoCString unescapedSpec; |
michael@0 | 221 | NS_UnescapeURL(PromiseFlatCString(aURIFragment), |
michael@0 | 222 | esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); |
michael@0 | 223 | // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII |
michael@0 | 224 | // superset since converting "http:" with such an encoding is always a bad |
michael@0 | 225 | // idea. |
michael@0 | 226 | if (!IsUTF8(unescapedSpec) && |
michael@0 | 227 | (aCharset.LowerCaseEqualsLiteral("utf-16") || |
michael@0 | 228 | aCharset.LowerCaseEqualsLiteral("utf-16be") || |
michael@0 | 229 | aCharset.LowerCaseEqualsLiteral("utf-16le") || |
michael@0 | 230 | aCharset.LowerCaseEqualsLiteral("utf-7") || |
michael@0 | 231 | aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){ |
michael@0 | 232 | CopyASCIItoUTF16(aURIFragment, _retval); |
michael@0 | 233 | return NS_OK; |
michael@0 | 234 | } |
michael@0 | 235 | |
michael@0 | 236 | return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval); |
michael@0 | 237 | } |
michael@0 | 238 | |
michael@0 | 239 | //---------------------------------------------------------------------- |