|
1 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
4 |
|
5 #include "nsEscape.h" |
|
6 #include "nsString.h" |
|
7 #include "nsIURI.h" |
|
8 #include "nsNetUtil.h" |
|
9 #include "nsUrlClassifierUtils.h" |
|
10 #include "nsTArray.h" |
|
11 #include "nsReadableUtils.h" |
|
12 #include "plbase64.h" |
|
13 #include "prprf.h" |
|
14 |
|
15 static char int_to_hex_digit(int32_t i) |
|
16 { |
|
17 NS_ASSERTION((i >= 0) && (i <= 15), "int too big in int_to_hex_digit"); |
|
18 return static_cast<char>(((i < 10) ? (i + '0') : ((i - 10) + 'A'))); |
|
19 } |
|
20 |
|
21 static bool |
|
22 IsDecimal(const nsACString & num) |
|
23 { |
|
24 for (uint32_t i = 0; i < num.Length(); i++) { |
|
25 if (!isdigit(num[i])) { |
|
26 return false; |
|
27 } |
|
28 } |
|
29 |
|
30 return true; |
|
31 } |
|
32 |
|
33 static bool |
|
34 IsHex(const nsACString & num) |
|
35 { |
|
36 if (num.Length() < 3) { |
|
37 return false; |
|
38 } |
|
39 |
|
40 if (num[0] != '0' || !(num[1] == 'x' || num[1] == 'X')) { |
|
41 return false; |
|
42 } |
|
43 |
|
44 for (uint32_t i = 2; i < num.Length(); i++) { |
|
45 if (!isxdigit(num[i])) { |
|
46 return false; |
|
47 } |
|
48 } |
|
49 |
|
50 return true; |
|
51 } |
|
52 |
|
53 static bool |
|
54 IsOctal(const nsACString & num) |
|
55 { |
|
56 if (num.Length() < 2) { |
|
57 return false; |
|
58 } |
|
59 |
|
60 if (num[0] != '0') { |
|
61 return false; |
|
62 } |
|
63 |
|
64 for (uint32_t i = 1; i < num.Length(); i++) { |
|
65 if (!isdigit(num[i]) || num[i] == '8' || num[i] == '9') { |
|
66 return false; |
|
67 } |
|
68 } |
|
69 |
|
70 return true; |
|
71 } |
|
72 |
|
73 nsUrlClassifierUtils::nsUrlClassifierUtils() : mEscapeCharmap(nullptr) |
|
74 { |
|
75 } |
|
76 |
|
77 nsresult |
|
78 nsUrlClassifierUtils::Init() |
|
79 { |
|
80 // Everything but alpha numerics, - and . |
|
81 mEscapeCharmap = new Charmap(0xffffffff, 0xfc009fff, 0xf8000001, 0xf8000001, |
|
82 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); |
|
83 if (!mEscapeCharmap) |
|
84 return NS_ERROR_OUT_OF_MEMORY; |
|
85 return NS_OK; |
|
86 } |
|
87 |
|
88 NS_IMPL_ISUPPORTS(nsUrlClassifierUtils, nsIUrlClassifierUtils) |
|
89 |
|
90 ///////////////////////////////////////////////////////////////////////////// |
|
91 // nsIUrlClassifierUtils |
|
92 |
|
93 NS_IMETHODIMP |
|
94 nsUrlClassifierUtils::GetKeyForURI(nsIURI * uri, nsACString & _retval) |
|
95 { |
|
96 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(uri); |
|
97 if (!innerURI) |
|
98 innerURI = uri; |
|
99 |
|
100 nsAutoCString host; |
|
101 innerURI->GetAsciiHost(host); |
|
102 |
|
103 if (host.IsEmpty()) { |
|
104 return NS_ERROR_MALFORMED_URI; |
|
105 } |
|
106 |
|
107 nsresult rv = CanonicalizeHostname(host, _retval); |
|
108 NS_ENSURE_SUCCESS(rv, rv); |
|
109 |
|
110 nsAutoCString path; |
|
111 rv = innerURI->GetPath(path); |
|
112 NS_ENSURE_SUCCESS(rv, rv); |
|
113 |
|
114 // strip out anchors |
|
115 int32_t ref = path.FindChar('#'); |
|
116 if (ref != kNotFound) |
|
117 path.SetLength(ref); |
|
118 |
|
119 nsAutoCString temp; |
|
120 rv = CanonicalizePath(path, temp); |
|
121 NS_ENSURE_SUCCESS(rv, rv); |
|
122 |
|
123 _retval.Append(temp); |
|
124 |
|
125 return NS_OK; |
|
126 } |
|
127 |
|
128 ///////////////////////////////////////////////////////////////////////////// |
|
129 // non-interface methods |
|
130 |
|
131 nsresult |
|
132 nsUrlClassifierUtils::CanonicalizeHostname(const nsACString & hostname, |
|
133 nsACString & _retval) |
|
134 { |
|
135 nsAutoCString unescaped; |
|
136 if (!NS_UnescapeURL(PromiseFlatCString(hostname).get(), |
|
137 PromiseFlatCString(hostname).Length(), |
|
138 0, unescaped)) { |
|
139 unescaped.Assign(hostname); |
|
140 } |
|
141 |
|
142 nsAutoCString cleaned; |
|
143 CleanupHostname(unescaped, cleaned); |
|
144 |
|
145 nsAutoCString temp; |
|
146 ParseIPAddress(cleaned, temp); |
|
147 if (!temp.IsEmpty()) { |
|
148 cleaned.Assign(temp); |
|
149 } |
|
150 |
|
151 ToLowerCase(cleaned); |
|
152 SpecialEncode(cleaned, false, _retval); |
|
153 |
|
154 return NS_OK; |
|
155 } |
|
156 |
|
157 |
|
158 nsresult |
|
159 nsUrlClassifierUtils::CanonicalizePath(const nsACString & path, |
|
160 nsACString & _retval) |
|
161 { |
|
162 _retval.Truncate(); |
|
163 |
|
164 nsAutoCString decodedPath(path); |
|
165 nsAutoCString temp; |
|
166 while (NS_UnescapeURL(decodedPath.get(), decodedPath.Length(), 0, temp)) { |
|
167 decodedPath.Assign(temp); |
|
168 temp.Truncate(); |
|
169 } |
|
170 |
|
171 SpecialEncode(decodedPath, true, _retval); |
|
172 // XXX: lowercase the path? |
|
173 |
|
174 return NS_OK; |
|
175 } |
|
176 |
|
177 void |
|
178 nsUrlClassifierUtils::CleanupHostname(const nsACString & hostname, |
|
179 nsACString & _retval) |
|
180 { |
|
181 _retval.Truncate(); |
|
182 |
|
183 const char* curChar = hostname.BeginReading(); |
|
184 const char* end = hostname.EndReading(); |
|
185 char lastChar = '\0'; |
|
186 while (curChar != end) { |
|
187 unsigned char c = static_cast<unsigned char>(*curChar); |
|
188 if (c == '.' && (lastChar == '\0' || lastChar == '.')) { |
|
189 // skip |
|
190 } else { |
|
191 _retval.Append(*curChar); |
|
192 } |
|
193 lastChar = c; |
|
194 ++curChar; |
|
195 } |
|
196 |
|
197 // cut off trailing dots |
|
198 while (_retval.Length() > 0 && _retval[_retval.Length() - 1] == '.') { |
|
199 _retval.SetLength(_retval.Length() - 1); |
|
200 } |
|
201 } |
|
202 |
|
203 void |
|
204 nsUrlClassifierUtils::ParseIPAddress(const nsACString & host, |
|
205 nsACString & _retval) |
|
206 { |
|
207 _retval.Truncate(); |
|
208 nsACString::const_iterator iter, end; |
|
209 host.BeginReading(iter); |
|
210 host.EndReading(end); |
|
211 |
|
212 if (host.Length() <= 15) { |
|
213 // The Windows resolver allows a 4-part dotted decimal IP address to |
|
214 // have a space followed by any old rubbish, so long as the total length |
|
215 // of the string doesn't get above 15 characters. So, "10.192.95.89 xy" |
|
216 // is resolved to 10.192.95.89. |
|
217 // If the string length is greater than 15 characters, e.g. |
|
218 // "10.192.95.89 xy.wildcard.example.com", it will be resolved through |
|
219 // DNS. |
|
220 |
|
221 if (FindCharInReadable(' ', iter, end)) { |
|
222 end = iter; |
|
223 } |
|
224 } |
|
225 |
|
226 for (host.BeginReading(iter); iter != end; iter++) { |
|
227 if (!(isxdigit(*iter) || *iter == 'x' || *iter == 'X' || *iter == '.')) { |
|
228 // not an IP |
|
229 return; |
|
230 } |
|
231 } |
|
232 |
|
233 host.BeginReading(iter); |
|
234 nsTArray<nsCString> parts; |
|
235 ParseString(PromiseFlatCString(Substring(iter, end)), '.', parts); |
|
236 if (parts.Length() > 4) { |
|
237 return; |
|
238 } |
|
239 |
|
240 // If any potentially-octal numbers (start with 0 but not hex) have |
|
241 // non-octal digits, no part of the ip can be in octal |
|
242 // XXX: this came from the old javascript implementation, is it really |
|
243 // supposed to be like this? |
|
244 bool allowOctal = true; |
|
245 uint32_t i; |
|
246 |
|
247 for (i = 0; i < parts.Length(); i++) { |
|
248 const nsCString& part = parts[i]; |
|
249 if (part[0] == '0') { |
|
250 for (uint32_t j = 1; j < part.Length(); j++) { |
|
251 if (part[j] == 'x') { |
|
252 break; |
|
253 } |
|
254 if (part[j] == '8' || part[j] == '9') { |
|
255 allowOctal = false; |
|
256 break; |
|
257 } |
|
258 } |
|
259 } |
|
260 } |
|
261 |
|
262 for (i = 0; i < parts.Length(); i++) { |
|
263 nsAutoCString canonical; |
|
264 |
|
265 if (i == parts.Length() - 1) { |
|
266 CanonicalNum(parts[i], 5 - parts.Length(), allowOctal, canonical); |
|
267 } else { |
|
268 CanonicalNum(parts[i], 1, allowOctal, canonical); |
|
269 } |
|
270 |
|
271 if (canonical.IsEmpty()) { |
|
272 _retval.Truncate(); |
|
273 return; |
|
274 } |
|
275 |
|
276 if (_retval.IsEmpty()) { |
|
277 _retval.Assign(canonical); |
|
278 } else { |
|
279 _retval.Append('.'); |
|
280 _retval.Append(canonical); |
|
281 } |
|
282 } |
|
283 return; |
|
284 } |
|
285 |
|
286 void |
|
287 nsUrlClassifierUtils::CanonicalNum(const nsACString& num, |
|
288 uint32_t bytes, |
|
289 bool allowOctal, |
|
290 nsACString& _retval) |
|
291 { |
|
292 _retval.Truncate(); |
|
293 |
|
294 if (num.Length() < 1) { |
|
295 return; |
|
296 } |
|
297 |
|
298 uint32_t val; |
|
299 if (allowOctal && IsOctal(num)) { |
|
300 if (PR_sscanf(PromiseFlatCString(num).get(), "%o", &val) != 1) { |
|
301 return; |
|
302 } |
|
303 } else if (IsDecimal(num)) { |
|
304 if (PR_sscanf(PromiseFlatCString(num).get(), "%u", &val) != 1) { |
|
305 return; |
|
306 } |
|
307 } else if (IsHex(num)) { |
|
308 if (PR_sscanf(PromiseFlatCString(num).get(), num[1] == 'X' ? "0X%x" : "0x%x", |
|
309 &val) != 1) { |
|
310 return; |
|
311 } |
|
312 } else { |
|
313 return; |
|
314 } |
|
315 |
|
316 while (bytes--) { |
|
317 char buf[20]; |
|
318 PR_snprintf(buf, sizeof(buf), "%u", val & 0xff); |
|
319 if (_retval.IsEmpty()) { |
|
320 _retval.Assign(buf); |
|
321 } else { |
|
322 _retval = nsDependentCString(buf) + NS_LITERAL_CSTRING(".") + _retval; |
|
323 } |
|
324 val >>= 8; |
|
325 } |
|
326 } |
|
327 |
|
328 // This function will encode all "special" characters in typical url |
|
329 // encoding, that is %hh where h is a valid hex digit. It will also fold |
|
330 // any duplicated slashes. |
|
331 bool |
|
332 nsUrlClassifierUtils::SpecialEncode(const nsACString & url, |
|
333 bool foldSlashes, |
|
334 nsACString & _retval) |
|
335 { |
|
336 bool changed = false; |
|
337 const char* curChar = url.BeginReading(); |
|
338 const char* end = url.EndReading(); |
|
339 |
|
340 unsigned char lastChar = '\0'; |
|
341 while (curChar != end) { |
|
342 unsigned char c = static_cast<unsigned char>(*curChar); |
|
343 if (ShouldURLEscape(c)) { |
|
344 _retval.Append('%'); |
|
345 _retval.Append(int_to_hex_digit(c / 16)); |
|
346 _retval.Append(int_to_hex_digit(c % 16)); |
|
347 |
|
348 changed = true; |
|
349 } else if (foldSlashes && (c == '/' && lastChar == '/')) { |
|
350 // skip |
|
351 } else { |
|
352 _retval.Append(*curChar); |
|
353 } |
|
354 lastChar = c; |
|
355 curChar++; |
|
356 } |
|
357 return changed; |
|
358 } |
|
359 |
|
360 bool |
|
361 nsUrlClassifierUtils::ShouldURLEscape(const unsigned char c) const |
|
362 { |
|
363 return c <= 32 || c == '%' || c >=127; |
|
364 } |