1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/components/url-classifier/nsUrlClassifierUtils.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,364 @@ 1.4 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.5 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.6 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.7 + 1.8 +#include "nsEscape.h" 1.9 +#include "nsString.h" 1.10 +#include "nsIURI.h" 1.11 +#include "nsNetUtil.h" 1.12 +#include "nsUrlClassifierUtils.h" 1.13 +#include "nsTArray.h" 1.14 +#include "nsReadableUtils.h" 1.15 +#include "plbase64.h" 1.16 +#include "prprf.h" 1.17 + 1.18 +static char int_to_hex_digit(int32_t i) 1.19 +{ 1.20 + NS_ASSERTION((i >= 0) && (i <= 15), "int too big in int_to_hex_digit"); 1.21 + return static_cast<char>(((i < 10) ? (i + '0') : ((i - 10) + 'A'))); 1.22 +} 1.23 + 1.24 +static bool 1.25 +IsDecimal(const nsACString & num) 1.26 +{ 1.27 + for (uint32_t i = 0; i < num.Length(); i++) { 1.28 + if (!isdigit(num[i])) { 1.29 + return false; 1.30 + } 1.31 + } 1.32 + 1.33 + return true; 1.34 +} 1.35 + 1.36 +static bool 1.37 +IsHex(const nsACString & num) 1.38 +{ 1.39 + if (num.Length() < 3) { 1.40 + return false; 1.41 + } 1.42 + 1.43 + if (num[0] != '0' || !(num[1] == 'x' || num[1] == 'X')) { 1.44 + return false; 1.45 + } 1.46 + 1.47 + for (uint32_t i = 2; i < num.Length(); i++) { 1.48 + if (!isxdigit(num[i])) { 1.49 + return false; 1.50 + } 1.51 + } 1.52 + 1.53 + return true; 1.54 +} 1.55 + 1.56 +static bool 1.57 +IsOctal(const nsACString & num) 1.58 +{ 1.59 + if (num.Length() < 2) { 1.60 + return false; 1.61 + } 1.62 + 1.63 + if (num[0] != '0') { 1.64 + return false; 1.65 + } 1.66 + 1.67 + for (uint32_t i = 1; i < num.Length(); i++) { 1.68 + if (!isdigit(num[i]) || num[i] == '8' || num[i] == '9') { 1.69 + return false; 1.70 + } 1.71 + } 1.72 + 1.73 + return true; 1.74 +} 1.75 + 1.76 +nsUrlClassifierUtils::nsUrlClassifierUtils() : mEscapeCharmap(nullptr) 1.77 +{ 1.78 +} 1.79 + 1.80 +nsresult 1.81 +nsUrlClassifierUtils::Init() 1.82 +{ 1.83 + // Everything but alpha numerics, - and . 1.84 + mEscapeCharmap = new Charmap(0xffffffff, 0xfc009fff, 0xf8000001, 0xf8000001, 1.85 + 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff); 1.86 + if (!mEscapeCharmap) 1.87 + return NS_ERROR_OUT_OF_MEMORY; 1.88 + return NS_OK; 1.89 +} 1.90 + 1.91 +NS_IMPL_ISUPPORTS(nsUrlClassifierUtils, nsIUrlClassifierUtils) 1.92 + 1.93 +///////////////////////////////////////////////////////////////////////////// 1.94 +// nsIUrlClassifierUtils 1.95 + 1.96 +NS_IMETHODIMP 1.97 +nsUrlClassifierUtils::GetKeyForURI(nsIURI * uri, nsACString & _retval) 1.98 +{ 1.99 + nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(uri); 1.100 + if (!innerURI) 1.101 + innerURI = uri; 1.102 + 1.103 + nsAutoCString host; 1.104 + innerURI->GetAsciiHost(host); 1.105 + 1.106 + if (host.IsEmpty()) { 1.107 + return NS_ERROR_MALFORMED_URI; 1.108 + } 1.109 + 1.110 + nsresult rv = CanonicalizeHostname(host, _retval); 1.111 + NS_ENSURE_SUCCESS(rv, rv); 1.112 + 1.113 + nsAutoCString path; 1.114 + rv = innerURI->GetPath(path); 1.115 + NS_ENSURE_SUCCESS(rv, rv); 1.116 + 1.117 + // strip out anchors 1.118 + int32_t ref = path.FindChar('#'); 1.119 + if (ref != kNotFound) 1.120 + path.SetLength(ref); 1.121 + 1.122 + nsAutoCString temp; 1.123 + rv = CanonicalizePath(path, temp); 1.124 + NS_ENSURE_SUCCESS(rv, rv); 1.125 + 1.126 + _retval.Append(temp); 1.127 + 1.128 + return NS_OK; 1.129 +} 1.130 + 1.131 +///////////////////////////////////////////////////////////////////////////// 1.132 +// non-interface methods 1.133 + 1.134 +nsresult 1.135 +nsUrlClassifierUtils::CanonicalizeHostname(const nsACString & hostname, 1.136 + nsACString & _retval) 1.137 +{ 1.138 + nsAutoCString unescaped; 1.139 + if (!NS_UnescapeURL(PromiseFlatCString(hostname).get(), 1.140 + PromiseFlatCString(hostname).Length(), 1.141 + 0, unescaped)) { 1.142 + unescaped.Assign(hostname); 1.143 + } 1.144 + 1.145 + nsAutoCString cleaned; 1.146 + CleanupHostname(unescaped, cleaned); 1.147 + 1.148 + nsAutoCString temp; 1.149 + ParseIPAddress(cleaned, temp); 1.150 + if (!temp.IsEmpty()) { 1.151 + cleaned.Assign(temp); 1.152 + } 1.153 + 1.154 + ToLowerCase(cleaned); 1.155 + SpecialEncode(cleaned, false, _retval); 1.156 + 1.157 + return NS_OK; 1.158 +} 1.159 + 1.160 + 1.161 +nsresult 1.162 +nsUrlClassifierUtils::CanonicalizePath(const nsACString & path, 1.163 + nsACString & _retval) 1.164 +{ 1.165 + _retval.Truncate(); 1.166 + 1.167 + nsAutoCString decodedPath(path); 1.168 + nsAutoCString temp; 1.169 + while (NS_UnescapeURL(decodedPath.get(), decodedPath.Length(), 0, temp)) { 1.170 + decodedPath.Assign(temp); 1.171 + temp.Truncate(); 1.172 + } 1.173 + 1.174 + SpecialEncode(decodedPath, true, _retval); 1.175 + // XXX: lowercase the path? 1.176 + 1.177 + return NS_OK; 1.178 +} 1.179 + 1.180 +void 1.181 +nsUrlClassifierUtils::CleanupHostname(const nsACString & hostname, 1.182 + nsACString & _retval) 1.183 +{ 1.184 + _retval.Truncate(); 1.185 + 1.186 + const char* curChar = hostname.BeginReading(); 1.187 + const char* end = hostname.EndReading(); 1.188 + char lastChar = '\0'; 1.189 + while (curChar != end) { 1.190 + unsigned char c = static_cast<unsigned char>(*curChar); 1.191 + if (c == '.' && (lastChar == '\0' || lastChar == '.')) { 1.192 + // skip 1.193 + } else { 1.194 + _retval.Append(*curChar); 1.195 + } 1.196 + lastChar = c; 1.197 + ++curChar; 1.198 + } 1.199 + 1.200 + // cut off trailing dots 1.201 + while (_retval.Length() > 0 && _retval[_retval.Length() - 1] == '.') { 1.202 + _retval.SetLength(_retval.Length() - 1); 1.203 + } 1.204 +} 1.205 + 1.206 +void 1.207 +nsUrlClassifierUtils::ParseIPAddress(const nsACString & host, 1.208 + nsACString & _retval) 1.209 +{ 1.210 + _retval.Truncate(); 1.211 + nsACString::const_iterator iter, end; 1.212 + host.BeginReading(iter); 1.213 + host.EndReading(end); 1.214 + 1.215 + if (host.Length() <= 15) { 1.216 + // The Windows resolver allows a 4-part dotted decimal IP address to 1.217 + // have a space followed by any old rubbish, so long as the total length 1.218 + // of the string doesn't get above 15 characters. So, "10.192.95.89 xy" 1.219 + // is resolved to 10.192.95.89. 1.220 + // If the string length is greater than 15 characters, e.g. 1.221 + // "10.192.95.89 xy.wildcard.example.com", it will be resolved through 1.222 + // DNS. 1.223 + 1.224 + if (FindCharInReadable(' ', iter, end)) { 1.225 + end = iter; 1.226 + } 1.227 + } 1.228 + 1.229 + for (host.BeginReading(iter); iter != end; iter++) { 1.230 + if (!(isxdigit(*iter) || *iter == 'x' || *iter == 'X' || *iter == '.')) { 1.231 + // not an IP 1.232 + return; 1.233 + } 1.234 + } 1.235 + 1.236 + host.BeginReading(iter); 1.237 + nsTArray<nsCString> parts; 1.238 + ParseString(PromiseFlatCString(Substring(iter, end)), '.', parts); 1.239 + if (parts.Length() > 4) { 1.240 + return; 1.241 + } 1.242 + 1.243 + // If any potentially-octal numbers (start with 0 but not hex) have 1.244 + // non-octal digits, no part of the ip can be in octal 1.245 + // XXX: this came from the old javascript implementation, is it really 1.246 + // supposed to be like this? 1.247 + bool allowOctal = true; 1.248 + uint32_t i; 1.249 + 1.250 + for (i = 0; i < parts.Length(); i++) { 1.251 + const nsCString& part = parts[i]; 1.252 + if (part[0] == '0') { 1.253 + for (uint32_t j = 1; j < part.Length(); j++) { 1.254 + if (part[j] == 'x') { 1.255 + break; 1.256 + } 1.257 + if (part[j] == '8' || part[j] == '9') { 1.258 + allowOctal = false; 1.259 + break; 1.260 + } 1.261 + } 1.262 + } 1.263 + } 1.264 + 1.265 + for (i = 0; i < parts.Length(); i++) { 1.266 + nsAutoCString canonical; 1.267 + 1.268 + if (i == parts.Length() - 1) { 1.269 + CanonicalNum(parts[i], 5 - parts.Length(), allowOctal, canonical); 1.270 + } else { 1.271 + CanonicalNum(parts[i], 1, allowOctal, canonical); 1.272 + } 1.273 + 1.274 + if (canonical.IsEmpty()) { 1.275 + _retval.Truncate(); 1.276 + return; 1.277 + } 1.278 + 1.279 + if (_retval.IsEmpty()) { 1.280 + _retval.Assign(canonical); 1.281 + } else { 1.282 + _retval.Append('.'); 1.283 + _retval.Append(canonical); 1.284 + } 1.285 + } 1.286 + return; 1.287 +} 1.288 + 1.289 +void 1.290 +nsUrlClassifierUtils::CanonicalNum(const nsACString& num, 1.291 + uint32_t bytes, 1.292 + bool allowOctal, 1.293 + nsACString& _retval) 1.294 +{ 1.295 + _retval.Truncate(); 1.296 + 1.297 + if (num.Length() < 1) { 1.298 + return; 1.299 + } 1.300 + 1.301 + uint32_t val; 1.302 + if (allowOctal && IsOctal(num)) { 1.303 + if (PR_sscanf(PromiseFlatCString(num).get(), "%o", &val) != 1) { 1.304 + return; 1.305 + } 1.306 + } else if (IsDecimal(num)) { 1.307 + if (PR_sscanf(PromiseFlatCString(num).get(), "%u", &val) != 1) { 1.308 + return; 1.309 + } 1.310 + } else if (IsHex(num)) { 1.311 + if (PR_sscanf(PromiseFlatCString(num).get(), num[1] == 'X' ? "0X%x" : "0x%x", 1.312 + &val) != 1) { 1.313 + return; 1.314 + } 1.315 + } else { 1.316 + return; 1.317 + } 1.318 + 1.319 + while (bytes--) { 1.320 + char buf[20]; 1.321 + PR_snprintf(buf, sizeof(buf), "%u", val & 0xff); 1.322 + if (_retval.IsEmpty()) { 1.323 + _retval.Assign(buf); 1.324 + } else { 1.325 + _retval = nsDependentCString(buf) + NS_LITERAL_CSTRING(".") + _retval; 1.326 + } 1.327 + val >>= 8; 1.328 + } 1.329 +} 1.330 + 1.331 +// This function will encode all "special" characters in typical url 1.332 +// encoding, that is %hh where h is a valid hex digit. It will also fold 1.333 +// any duplicated slashes. 1.334 +bool 1.335 +nsUrlClassifierUtils::SpecialEncode(const nsACString & url, 1.336 + bool foldSlashes, 1.337 + nsACString & _retval) 1.338 +{ 1.339 + bool changed = false; 1.340 + const char* curChar = url.BeginReading(); 1.341 + const char* end = url.EndReading(); 1.342 + 1.343 + unsigned char lastChar = '\0'; 1.344 + while (curChar != end) { 1.345 + unsigned char c = static_cast<unsigned char>(*curChar); 1.346 + if (ShouldURLEscape(c)) { 1.347 + _retval.Append('%'); 1.348 + _retval.Append(int_to_hex_digit(c / 16)); 1.349 + _retval.Append(int_to_hex_digit(c % 16)); 1.350 + 1.351 + changed = true; 1.352 + } else if (foldSlashes && (c == '/' && lastChar == '/')) { 1.353 + // skip 1.354 + } else { 1.355 + _retval.Append(*curChar); 1.356 + } 1.357 + lastChar = c; 1.358 + curChar++; 1.359 + } 1.360 + return changed; 1.361 +} 1.362 + 1.363 +bool 1.364 +nsUrlClassifierUtils::ShouldURLEscape(const unsigned char c) const 1.365 +{ 1.366 + return c <= 32 || c == '%' || c >=127; 1.367 +}