Fri, 16 Jan 2015 18:13:44 +0100
Integrate suggestion from review to improve consistency with existing code.
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 #include "nsEscape.h"
6 #include "nsString.h"
7 #include "nsIURI.h"
8 #include "nsNetUtil.h"
9 #include "nsUrlClassifierUtils.h"
10 #include "nsTArray.h"
11 #include "nsReadableUtils.h"
12 #include "plbase64.h"
13 #include "prprf.h"
15 static char int_to_hex_digit(int32_t i)
16 {
17 NS_ASSERTION((i >= 0) && (i <= 15), "int too big in int_to_hex_digit");
18 return static_cast<char>(((i < 10) ? (i + '0') : ((i - 10) + 'A')));
19 }
21 static bool
22 IsDecimal(const nsACString & num)
23 {
24 for (uint32_t i = 0; i < num.Length(); i++) {
25 if (!isdigit(num[i])) {
26 return false;
27 }
28 }
30 return true;
31 }
33 static bool
34 IsHex(const nsACString & num)
35 {
36 if (num.Length() < 3) {
37 return false;
38 }
40 if (num[0] != '0' || !(num[1] == 'x' || num[1] == 'X')) {
41 return false;
42 }
44 for (uint32_t i = 2; i < num.Length(); i++) {
45 if (!isxdigit(num[i])) {
46 return false;
47 }
48 }
50 return true;
51 }
53 static bool
54 IsOctal(const nsACString & num)
55 {
56 if (num.Length() < 2) {
57 return false;
58 }
60 if (num[0] != '0') {
61 return false;
62 }
64 for (uint32_t i = 1; i < num.Length(); i++) {
65 if (!isdigit(num[i]) || num[i] == '8' || num[i] == '9') {
66 return false;
67 }
68 }
70 return true;
71 }
73 nsUrlClassifierUtils::nsUrlClassifierUtils() : mEscapeCharmap(nullptr)
74 {
75 }
77 nsresult
78 nsUrlClassifierUtils::Init()
79 {
80 // Everything but alpha numerics, - and .
81 mEscapeCharmap = new Charmap(0xffffffff, 0xfc009fff, 0xf8000001, 0xf8000001,
82 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);
83 if (!mEscapeCharmap)
84 return NS_ERROR_OUT_OF_MEMORY;
85 return NS_OK;
86 }
88 NS_IMPL_ISUPPORTS(nsUrlClassifierUtils, nsIUrlClassifierUtils)
90 /////////////////////////////////////////////////////////////////////////////
91 // nsIUrlClassifierUtils
93 NS_IMETHODIMP
94 nsUrlClassifierUtils::GetKeyForURI(nsIURI * uri, nsACString & _retval)
95 {
96 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(uri);
97 if (!innerURI)
98 innerURI = uri;
100 nsAutoCString host;
101 innerURI->GetAsciiHost(host);
103 if (host.IsEmpty()) {
104 return NS_ERROR_MALFORMED_URI;
105 }
107 nsresult rv = CanonicalizeHostname(host, _retval);
108 NS_ENSURE_SUCCESS(rv, rv);
110 nsAutoCString path;
111 rv = innerURI->GetPath(path);
112 NS_ENSURE_SUCCESS(rv, rv);
114 // strip out anchors
115 int32_t ref = path.FindChar('#');
116 if (ref != kNotFound)
117 path.SetLength(ref);
119 nsAutoCString temp;
120 rv = CanonicalizePath(path, temp);
121 NS_ENSURE_SUCCESS(rv, rv);
123 _retval.Append(temp);
125 return NS_OK;
126 }
128 /////////////////////////////////////////////////////////////////////////////
129 // non-interface methods
131 nsresult
132 nsUrlClassifierUtils::CanonicalizeHostname(const nsACString & hostname,
133 nsACString & _retval)
134 {
135 nsAutoCString unescaped;
136 if (!NS_UnescapeURL(PromiseFlatCString(hostname).get(),
137 PromiseFlatCString(hostname).Length(),
138 0, unescaped)) {
139 unescaped.Assign(hostname);
140 }
142 nsAutoCString cleaned;
143 CleanupHostname(unescaped, cleaned);
145 nsAutoCString temp;
146 ParseIPAddress(cleaned, temp);
147 if (!temp.IsEmpty()) {
148 cleaned.Assign(temp);
149 }
151 ToLowerCase(cleaned);
152 SpecialEncode(cleaned, false, _retval);
154 return NS_OK;
155 }
158 nsresult
159 nsUrlClassifierUtils::CanonicalizePath(const nsACString & path,
160 nsACString & _retval)
161 {
162 _retval.Truncate();
164 nsAutoCString decodedPath(path);
165 nsAutoCString temp;
166 while (NS_UnescapeURL(decodedPath.get(), decodedPath.Length(), 0, temp)) {
167 decodedPath.Assign(temp);
168 temp.Truncate();
169 }
171 SpecialEncode(decodedPath, true, _retval);
172 // XXX: lowercase the path?
174 return NS_OK;
175 }
177 void
178 nsUrlClassifierUtils::CleanupHostname(const nsACString & hostname,
179 nsACString & _retval)
180 {
181 _retval.Truncate();
183 const char* curChar = hostname.BeginReading();
184 const char* end = hostname.EndReading();
185 char lastChar = '\0';
186 while (curChar != end) {
187 unsigned char c = static_cast<unsigned char>(*curChar);
188 if (c == '.' && (lastChar == '\0' || lastChar == '.')) {
189 // skip
190 } else {
191 _retval.Append(*curChar);
192 }
193 lastChar = c;
194 ++curChar;
195 }
197 // cut off trailing dots
198 while (_retval.Length() > 0 && _retval[_retval.Length() - 1] == '.') {
199 _retval.SetLength(_retval.Length() - 1);
200 }
201 }
203 void
204 nsUrlClassifierUtils::ParseIPAddress(const nsACString & host,
205 nsACString & _retval)
206 {
207 _retval.Truncate();
208 nsACString::const_iterator iter, end;
209 host.BeginReading(iter);
210 host.EndReading(end);
212 if (host.Length() <= 15) {
213 // The Windows resolver allows a 4-part dotted decimal IP address to
214 // have a space followed by any old rubbish, so long as the total length
215 // of the string doesn't get above 15 characters. So, "10.192.95.89 xy"
216 // is resolved to 10.192.95.89.
217 // If the string length is greater than 15 characters, e.g.
218 // "10.192.95.89 xy.wildcard.example.com", it will be resolved through
219 // DNS.
221 if (FindCharInReadable(' ', iter, end)) {
222 end = iter;
223 }
224 }
226 for (host.BeginReading(iter); iter != end; iter++) {
227 if (!(isxdigit(*iter) || *iter == 'x' || *iter == 'X' || *iter == '.')) {
228 // not an IP
229 return;
230 }
231 }
233 host.BeginReading(iter);
234 nsTArray<nsCString> parts;
235 ParseString(PromiseFlatCString(Substring(iter, end)), '.', parts);
236 if (parts.Length() > 4) {
237 return;
238 }
240 // If any potentially-octal numbers (start with 0 but not hex) have
241 // non-octal digits, no part of the ip can be in octal
242 // XXX: this came from the old javascript implementation, is it really
243 // supposed to be like this?
244 bool allowOctal = true;
245 uint32_t i;
247 for (i = 0; i < parts.Length(); i++) {
248 const nsCString& part = parts[i];
249 if (part[0] == '0') {
250 for (uint32_t j = 1; j < part.Length(); j++) {
251 if (part[j] == 'x') {
252 break;
253 }
254 if (part[j] == '8' || part[j] == '9') {
255 allowOctal = false;
256 break;
257 }
258 }
259 }
260 }
262 for (i = 0; i < parts.Length(); i++) {
263 nsAutoCString canonical;
265 if (i == parts.Length() - 1) {
266 CanonicalNum(parts[i], 5 - parts.Length(), allowOctal, canonical);
267 } else {
268 CanonicalNum(parts[i], 1, allowOctal, canonical);
269 }
271 if (canonical.IsEmpty()) {
272 _retval.Truncate();
273 return;
274 }
276 if (_retval.IsEmpty()) {
277 _retval.Assign(canonical);
278 } else {
279 _retval.Append('.');
280 _retval.Append(canonical);
281 }
282 }
283 return;
284 }
286 void
287 nsUrlClassifierUtils::CanonicalNum(const nsACString& num,
288 uint32_t bytes,
289 bool allowOctal,
290 nsACString& _retval)
291 {
292 _retval.Truncate();
294 if (num.Length() < 1) {
295 return;
296 }
298 uint32_t val;
299 if (allowOctal && IsOctal(num)) {
300 if (PR_sscanf(PromiseFlatCString(num).get(), "%o", &val) != 1) {
301 return;
302 }
303 } else if (IsDecimal(num)) {
304 if (PR_sscanf(PromiseFlatCString(num).get(), "%u", &val) != 1) {
305 return;
306 }
307 } else if (IsHex(num)) {
308 if (PR_sscanf(PromiseFlatCString(num).get(), num[1] == 'X' ? "0X%x" : "0x%x",
309 &val) != 1) {
310 return;
311 }
312 } else {
313 return;
314 }
316 while (bytes--) {
317 char buf[20];
318 PR_snprintf(buf, sizeof(buf), "%u", val & 0xff);
319 if (_retval.IsEmpty()) {
320 _retval.Assign(buf);
321 } else {
322 _retval = nsDependentCString(buf) + NS_LITERAL_CSTRING(".") + _retval;
323 }
324 val >>= 8;
325 }
326 }
328 // This function will encode all "special" characters in typical url
329 // encoding, that is %hh where h is a valid hex digit. It will also fold
330 // any duplicated slashes.
331 bool
332 nsUrlClassifierUtils::SpecialEncode(const nsACString & url,
333 bool foldSlashes,
334 nsACString & _retval)
335 {
336 bool changed = false;
337 const char* curChar = url.BeginReading();
338 const char* end = url.EndReading();
340 unsigned char lastChar = '\0';
341 while (curChar != end) {
342 unsigned char c = static_cast<unsigned char>(*curChar);
343 if (ShouldURLEscape(c)) {
344 _retval.Append('%');
345 _retval.Append(int_to_hex_digit(c / 16));
346 _retval.Append(int_to_hex_digit(c % 16));
348 changed = true;
349 } else if (foldSlashes && (c == '/' && lastChar == '/')) {
350 // skip
351 } else {
352 _retval.Append(*curChar);
353 }
354 lastChar = c;
355 curChar++;
356 }
357 return changed;
358 }
360 bool
361 nsUrlClassifierUtils::ShouldURLEscape(const unsigned char c) const
362 {
363 return c <= 32 || c == '%' || c >=127;
364 }