intl/uconv/src/nsTextToSubURI.cpp

branch
TOR_BUG_9701
changeset 15
b8a032363ba2
equal deleted inserted replaced
-1:000000000000 0:450b3aec15af
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsString.h"
6 #include "nsIUnicodeEncoder.h"
7 #include "nsICharsetConverterManager.h"
8 #include "nsITextToSubURI.h"
9 #include "nsEscape.h"
10 #include "nsTextToSubURI.h"
11 #include "nsCRT.h"
12 #include "nsServiceManagerUtils.h"
13
14 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
15
16 nsTextToSubURI::nsTextToSubURI()
17 {
18 }
19 nsTextToSubURI::~nsTextToSubURI()
20 {
21 }
22
23 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
24
25 NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape(
26 const char *charset, const char16_t *text, char **_retval)
27 {
28 if(nullptr == _retval)
29 return NS_ERROR_NULL_POINTER;
30 *_retval = nullptr;
31 nsresult rv = NS_OK;
32
33 // Get Charset, get the encoder.
34 nsICharsetConverterManager *ccm;
35 rv = CallGetService(kCharsetConverterManagerCID, &ccm);
36 if(NS_SUCCEEDED(rv)) {
37 nsIUnicodeEncoder *encoder;
38 rv = ccm->GetUnicodeEncoder(charset, &encoder);
39 NS_RELEASE(ccm);
40 if (NS_SUCCEEDED(rv)) {
41 rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?');
42 if(NS_SUCCEEDED(rv))
43 {
44 char buf[256];
45 char *pBuf = buf;
46 int32_t ulen = text ? NS_strlen(text) : 0;
47 int32_t outlen = 0;
48 if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen)))
49 {
50 if(outlen >= 256) {
51 pBuf = (char*)NS_Alloc(outlen+1);
52 }
53 if(nullptr == pBuf) {
54 outlen = 255;
55 pBuf = buf;
56 }
57 int32_t bufLen = outlen;
58 if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) {
59 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary
60 int32_t finLen = bufLen - outlen;
61 if (finLen > 0) {
62 if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen)))
63 outlen += finLen;
64 }
65 pBuf[outlen] = '\0';
66 *_retval = nsEscape(pBuf, url_XPAlphas);
67 if(nullptr == *_retval)
68 rv = NS_ERROR_OUT_OF_MEMORY;
69 }
70 }
71 if(pBuf != buf)
72 NS_Free(pBuf);
73 }
74 NS_RELEASE(encoder);
75 }
76 }
77
78 return rv;
79 }
80
81 NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert(
82 const char *charset, const char *text, char16_t **_retval)
83 {
84 if(nullptr == _retval)
85 return NS_ERROR_NULL_POINTER;
86 if(nullptr == text) {
87 // set empty string instead of returning error
88 // due to compatibility for old version
89 text = "";
90 }
91 *_retval = nullptr;
92 nsresult rv = NS_OK;
93
94 // unescape the string, unescape changes the input
95 char *unescaped = NS_strdup(text);
96 if (nullptr == unescaped)
97 return NS_ERROR_OUT_OF_MEMORY;
98 unescaped = nsUnescape(unescaped);
99 NS_ASSERTION(unescaped, "nsUnescape returned null");
100
101 // Convert from the charset to unicode
102 nsCOMPtr<nsICharsetConverterManager> ccm =
103 do_GetService(kCharsetConverterManagerCID, &rv);
104 if (NS_SUCCEEDED(rv)) {
105 nsIUnicodeDecoder *decoder;
106 rv = ccm->GetUnicodeDecoder(charset, &decoder);
107 if (NS_SUCCEEDED(rv)) {
108 char16_t *pBuf = nullptr;
109 int32_t len = strlen(unescaped);
110 int32_t outlen = 0;
111 if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) {
112 pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t));
113 if (nullptr == pBuf)
114 rv = NS_ERROR_OUT_OF_MEMORY;
115 else {
116 if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) {
117 pBuf[outlen] = 0;
118 *_retval = pBuf;
119 }
120 else
121 NS_Free(pBuf);
122 }
123 }
124 NS_RELEASE(decoder);
125 }
126 }
127 NS_Free(unescaped);
128
129 return rv;
130 }
131
132 static bool statefulCharset(const char *charset)
133 {
134 if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
135 !nsCRT::strcasecmp(charset, "UTF-7") ||
136 !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
137 return true;
138
139 return false;
140 }
141
142 nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset,
143 const nsAFlatCString &aURI,
144 bool aIRI,
145 nsAString &_retval)
146 {
147 nsresult rv = NS_OK;
148
149 // check for 7bit encoding the data may not be ASCII after we decode
150 bool isStatefulCharset = statefulCharset(aCharset.get());
151
152 if (!isStatefulCharset && IsASCII(aURI)) {
153 CopyASCIItoUTF16(aURI, _retval);
154 return rv;
155 }
156
157 if (!isStatefulCharset && aIRI) {
158 if (IsUTF8(aURI)) {
159 CopyUTF8toUTF16(aURI, _retval);
160 return rv;
161 }
162 }
163
164 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
165 NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
166
167 nsCOMPtr<nsICharsetConverterManager> charsetConverterManager;
168
169 charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv);
170 NS_ENSURE_SUCCESS(rv, rv);
171
172 nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder;
173 rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(),
174 getter_AddRefs(unicodeDecoder));
175 NS_ENSURE_SUCCESS(rv, rv);
176 unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal);
177
178 int32_t srcLen = aURI.Length();
179 int32_t dstLen;
180 rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen);
181 NS_ENSURE_SUCCESS(rv, rv);
182
183 char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t));
184 NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY);
185
186 rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen);
187
188 if (NS_SUCCEEDED(rv))
189 _retval.Assign(ustr, dstLen);
190
191 NS_Free(ustr);
192
193 return rv;
194 }
195
196 NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
197 const nsACString &aURIFragment,
198 nsAString &_retval)
199 {
200 nsAutoCString unescapedSpec;
201 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
202 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
203 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
204
205 // in case of failure, return escaped URI
206 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
207 // sequences are also considered failure in this context
208 if (convertURItoUnicode(
209 PromiseFlatCString(aCharset), unescapedSpec, true, _retval)
210 != NS_OK)
211 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
212 CopyUTF8toUTF16(aURIFragment, _retval);
213 return NS_OK;
214 }
215
216 NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset,
217 const nsACString & aURIFragment,
218 nsAString &_retval)
219 {
220 nsAutoCString unescapedSpec;
221 NS_UnescapeURL(PromiseFlatCString(aURIFragment),
222 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
223 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
224 // superset since converting "http:" with such an encoding is always a bad
225 // idea.
226 if (!IsUTF8(unescapedSpec) &&
227 (aCharset.LowerCaseEqualsLiteral("utf-16") ||
228 aCharset.LowerCaseEqualsLiteral("utf-16be") ||
229 aCharset.LowerCaseEqualsLiteral("utf-16le") ||
230 aCharset.LowerCaseEqualsLiteral("utf-7") ||
231 aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
232 CopyASCIItoUTF16(aURIFragment, _retval);
233 return NS_OK;
234 }
235
236 return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval);
237 }
238
239 //----------------------------------------------------------------------

mercurial