|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 #include "nsString.h" |
|
6 #include "nsIUnicodeEncoder.h" |
|
7 #include "nsICharsetConverterManager.h" |
|
8 #include "nsITextToSubURI.h" |
|
9 #include "nsEscape.h" |
|
10 #include "nsTextToSubURI.h" |
|
11 #include "nsCRT.h" |
|
12 #include "nsServiceManagerUtils.h" |
|
13 |
|
14 static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID); |
|
15 |
|
16 nsTextToSubURI::nsTextToSubURI() |
|
17 { |
|
18 } |
|
19 nsTextToSubURI::~nsTextToSubURI() |
|
20 { |
|
21 } |
|
22 |
|
23 NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) |
|
24 |
|
25 NS_IMETHODIMP nsTextToSubURI::ConvertAndEscape( |
|
26 const char *charset, const char16_t *text, char **_retval) |
|
27 { |
|
28 if(nullptr == _retval) |
|
29 return NS_ERROR_NULL_POINTER; |
|
30 *_retval = nullptr; |
|
31 nsresult rv = NS_OK; |
|
32 |
|
33 // Get Charset, get the encoder. |
|
34 nsICharsetConverterManager *ccm; |
|
35 rv = CallGetService(kCharsetConverterManagerCID, &ccm); |
|
36 if(NS_SUCCEEDED(rv)) { |
|
37 nsIUnicodeEncoder *encoder; |
|
38 rv = ccm->GetUnicodeEncoder(charset, &encoder); |
|
39 NS_RELEASE(ccm); |
|
40 if (NS_SUCCEEDED(rv)) { |
|
41 rv = encoder->SetOutputErrorBehavior(nsIUnicodeEncoder::kOnError_Replace, nullptr, (char16_t)'?'); |
|
42 if(NS_SUCCEEDED(rv)) |
|
43 { |
|
44 char buf[256]; |
|
45 char *pBuf = buf; |
|
46 int32_t ulen = text ? NS_strlen(text) : 0; |
|
47 int32_t outlen = 0; |
|
48 if(NS_SUCCEEDED(rv = encoder->GetMaxLength(text, ulen, &outlen))) |
|
49 { |
|
50 if(outlen >= 256) { |
|
51 pBuf = (char*)NS_Alloc(outlen+1); |
|
52 } |
|
53 if(nullptr == pBuf) { |
|
54 outlen = 255; |
|
55 pBuf = buf; |
|
56 } |
|
57 int32_t bufLen = outlen; |
|
58 if(NS_SUCCEEDED(rv = encoder->Convert(text,&ulen, pBuf, &outlen))) { |
|
59 // put termination characters (e.g. ESC(B of ISO-2022-JP) if necessary |
|
60 int32_t finLen = bufLen - outlen; |
|
61 if (finLen > 0) { |
|
62 if (NS_SUCCEEDED(encoder->Finish((char *)(pBuf+outlen), &finLen))) |
|
63 outlen += finLen; |
|
64 } |
|
65 pBuf[outlen] = '\0'; |
|
66 *_retval = nsEscape(pBuf, url_XPAlphas); |
|
67 if(nullptr == *_retval) |
|
68 rv = NS_ERROR_OUT_OF_MEMORY; |
|
69 } |
|
70 } |
|
71 if(pBuf != buf) |
|
72 NS_Free(pBuf); |
|
73 } |
|
74 NS_RELEASE(encoder); |
|
75 } |
|
76 } |
|
77 |
|
78 return rv; |
|
79 } |
|
80 |
|
81 NS_IMETHODIMP nsTextToSubURI::UnEscapeAndConvert( |
|
82 const char *charset, const char *text, char16_t **_retval) |
|
83 { |
|
84 if(nullptr == _retval) |
|
85 return NS_ERROR_NULL_POINTER; |
|
86 if(nullptr == text) { |
|
87 // set empty string instead of returning error |
|
88 // due to compatibility for old version |
|
89 text = ""; |
|
90 } |
|
91 *_retval = nullptr; |
|
92 nsresult rv = NS_OK; |
|
93 |
|
94 // unescape the string, unescape changes the input |
|
95 char *unescaped = NS_strdup(text); |
|
96 if (nullptr == unescaped) |
|
97 return NS_ERROR_OUT_OF_MEMORY; |
|
98 unescaped = nsUnescape(unescaped); |
|
99 NS_ASSERTION(unescaped, "nsUnescape returned null"); |
|
100 |
|
101 // Convert from the charset to unicode |
|
102 nsCOMPtr<nsICharsetConverterManager> ccm = |
|
103 do_GetService(kCharsetConverterManagerCID, &rv); |
|
104 if (NS_SUCCEEDED(rv)) { |
|
105 nsIUnicodeDecoder *decoder; |
|
106 rv = ccm->GetUnicodeDecoder(charset, &decoder); |
|
107 if (NS_SUCCEEDED(rv)) { |
|
108 char16_t *pBuf = nullptr; |
|
109 int32_t len = strlen(unescaped); |
|
110 int32_t outlen = 0; |
|
111 if (NS_SUCCEEDED(rv = decoder->GetMaxLength(unescaped, len, &outlen))) { |
|
112 pBuf = (char16_t *) NS_Alloc((outlen+1)*sizeof(char16_t)); |
|
113 if (nullptr == pBuf) |
|
114 rv = NS_ERROR_OUT_OF_MEMORY; |
|
115 else { |
|
116 if (NS_SUCCEEDED(rv = decoder->Convert(unescaped, &len, pBuf, &outlen))) { |
|
117 pBuf[outlen] = 0; |
|
118 *_retval = pBuf; |
|
119 } |
|
120 else |
|
121 NS_Free(pBuf); |
|
122 } |
|
123 } |
|
124 NS_RELEASE(decoder); |
|
125 } |
|
126 } |
|
127 NS_Free(unescaped); |
|
128 |
|
129 return rv; |
|
130 } |
|
131 |
|
132 static bool statefulCharset(const char *charset) |
|
133 { |
|
134 if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) || |
|
135 !nsCRT::strcasecmp(charset, "UTF-7") || |
|
136 !nsCRT::strcasecmp(charset, "HZ-GB-2312")) |
|
137 return true; |
|
138 |
|
139 return false; |
|
140 } |
|
141 |
|
142 nsresult nsTextToSubURI::convertURItoUnicode(const nsAFlatCString &aCharset, |
|
143 const nsAFlatCString &aURI, |
|
144 bool aIRI, |
|
145 nsAString &_retval) |
|
146 { |
|
147 nsresult rv = NS_OK; |
|
148 |
|
149 // check for 7bit encoding the data may not be ASCII after we decode |
|
150 bool isStatefulCharset = statefulCharset(aCharset.get()); |
|
151 |
|
152 if (!isStatefulCharset && IsASCII(aURI)) { |
|
153 CopyASCIItoUTF16(aURI, _retval); |
|
154 return rv; |
|
155 } |
|
156 |
|
157 if (!isStatefulCharset && aIRI) { |
|
158 if (IsUTF8(aURI)) { |
|
159 CopyUTF8toUTF16(aURI, _retval); |
|
160 return rv; |
|
161 } |
|
162 } |
|
163 |
|
164 // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. |
|
165 NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); |
|
166 |
|
167 nsCOMPtr<nsICharsetConverterManager> charsetConverterManager; |
|
168 |
|
169 charsetConverterManager = do_GetService(NS_CHARSETCONVERTERMANAGER_CONTRACTID, &rv); |
|
170 NS_ENSURE_SUCCESS(rv, rv); |
|
171 |
|
172 nsCOMPtr<nsIUnicodeDecoder> unicodeDecoder; |
|
173 rv = charsetConverterManager->GetUnicodeDecoder(aCharset.get(), |
|
174 getter_AddRefs(unicodeDecoder)); |
|
175 NS_ENSURE_SUCCESS(rv, rv); |
|
176 unicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Signal); |
|
177 |
|
178 int32_t srcLen = aURI.Length(); |
|
179 int32_t dstLen; |
|
180 rv = unicodeDecoder->GetMaxLength(aURI.get(), srcLen, &dstLen); |
|
181 NS_ENSURE_SUCCESS(rv, rv); |
|
182 |
|
183 char16_t *ustr = (char16_t *) NS_Alloc(dstLen * sizeof(char16_t)); |
|
184 NS_ENSURE_TRUE(ustr, NS_ERROR_OUT_OF_MEMORY); |
|
185 |
|
186 rv = unicodeDecoder->Convert(aURI.get(), &srcLen, ustr, &dstLen); |
|
187 |
|
188 if (NS_SUCCEEDED(rv)) |
|
189 _retval.Assign(ustr, dstLen); |
|
190 |
|
191 NS_Free(ustr); |
|
192 |
|
193 return rv; |
|
194 } |
|
195 |
|
196 NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, |
|
197 const nsACString &aURIFragment, |
|
198 nsAString &_retval) |
|
199 { |
|
200 nsAutoCString unescapedSpec; |
|
201 // skip control octets (0x00 - 0x1f and 0x7f) when unescaping |
|
202 NS_UnescapeURL(PromiseFlatCString(aURIFragment), |
|
203 esc_SkipControl | esc_AlwaysCopy, unescapedSpec); |
|
204 |
|
205 // in case of failure, return escaped URI |
|
206 // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte |
|
207 // sequences are also considered failure in this context |
|
208 if (convertURItoUnicode( |
|
209 PromiseFlatCString(aCharset), unescapedSpec, true, _retval) |
|
210 != NS_OK) |
|
211 // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 |
|
212 CopyUTF8toUTF16(aURIFragment, _retval); |
|
213 return NS_OK; |
|
214 } |
|
215 |
|
216 NS_IMETHODIMP nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString & aCharset, |
|
217 const nsACString & aURIFragment, |
|
218 nsAString &_retval) |
|
219 { |
|
220 nsAutoCString unescapedSpec; |
|
221 NS_UnescapeURL(PromiseFlatCString(aURIFragment), |
|
222 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); |
|
223 // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII |
|
224 // superset since converting "http:" with such an encoding is always a bad |
|
225 // idea. |
|
226 if (!IsUTF8(unescapedSpec) && |
|
227 (aCharset.LowerCaseEqualsLiteral("utf-16") || |
|
228 aCharset.LowerCaseEqualsLiteral("utf-16be") || |
|
229 aCharset.LowerCaseEqualsLiteral("utf-16le") || |
|
230 aCharset.LowerCaseEqualsLiteral("utf-7") || |
|
231 aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){ |
|
232 CopyASCIItoUTF16(aURIFragment, _retval); |
|
233 return NS_OK; |
|
234 } |
|
235 |
|
236 return convertURItoUnicode(PromiseFlatCString(aCharset), unescapedSpec, true, _retval); |
|
237 } |
|
238 |
|
239 //---------------------------------------------------------------------- |