1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/netwerk/base/src/nsURLHelper.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1105 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* vim:set ts=4 sw=4 sts=4 et cindent: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#include "mozilla/RangedPtr.h" 1.11 + 1.12 +#include "nsURLHelper.h" 1.13 +#include "nsIFile.h" 1.14 +#include "nsIURLParser.h" 1.15 +#include "nsCOMPtr.h" 1.16 +#include "nsCRT.h" 1.17 +#include "nsNetCID.h" 1.18 +#include "prnetdb.h" 1.19 + 1.20 +using namespace mozilla; 1.21 + 1.22 +//---------------------------------------------------------------------------- 1.23 +// Init/Shutdown 1.24 +//---------------------------------------------------------------------------- 1.25 + 1.26 +static bool gInitialized = false; 1.27 +static nsIURLParser *gNoAuthURLParser = nullptr; 1.28 +static nsIURLParser *gAuthURLParser = nullptr; 1.29 +static nsIURLParser *gStdURLParser = nullptr; 1.30 + 1.31 +static void 1.32 +InitGlobals() 1.33 +{ 1.34 + nsCOMPtr<nsIURLParser> parser; 1.35 + 1.36 + parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID); 1.37 + NS_ASSERTION(parser, "failed getting 'noauth' url parser"); 1.38 + if (parser) { 1.39 + gNoAuthURLParser = parser.get(); 1.40 + NS_ADDREF(gNoAuthURLParser); 1.41 + } 1.42 + 1.43 + parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID); 1.44 + NS_ASSERTION(parser, "failed getting 'auth' url parser"); 1.45 + if (parser) { 1.46 + gAuthURLParser = parser.get(); 1.47 + NS_ADDREF(gAuthURLParser); 1.48 + } 1.49 + 1.50 + parser = do_GetService(NS_STDURLPARSER_CONTRACTID); 1.51 + NS_ASSERTION(parser, "failed getting 'std' url parser"); 1.52 + if (parser) { 1.53 + gStdURLParser = parser.get(); 1.54 + NS_ADDREF(gStdURLParser); 1.55 + } 1.56 + 1.57 + gInitialized = true; 1.58 +} 1.59 + 1.60 +void 1.61 +net_ShutdownURLHelper() 1.62 +{ 1.63 + if (gInitialized) { 1.64 + NS_IF_RELEASE(gNoAuthURLParser); 1.65 + NS_IF_RELEASE(gAuthURLParser); 1.66 + NS_IF_RELEASE(gStdURLParser); 1.67 + gInitialized = false; 1.68 + } 1.69 +} 1.70 + 1.71 +//---------------------------------------------------------------------------- 1.72 +// nsIURLParser getters 1.73 +//---------------------------------------------------------------------------- 1.74 + 1.75 +nsIURLParser * 1.76 +net_GetAuthURLParser() 1.77 +{ 1.78 + if (!gInitialized) 1.79 + InitGlobals(); 1.80 + return gAuthURLParser; 1.81 +} 1.82 + 1.83 +nsIURLParser * 1.84 +net_GetNoAuthURLParser() 1.85 +{ 1.86 + if (!gInitialized) 1.87 + InitGlobals(); 1.88 + return gNoAuthURLParser; 1.89 +} 1.90 + 1.91 +nsIURLParser * 1.92 +net_GetStdURLParser() 1.93 +{ 1.94 + if (!gInitialized) 1.95 + InitGlobals(); 1.96 + return gStdURLParser; 1.97 +} 1.98 + 1.99 +//--------------------------------------------------------------------------- 1.100 +// GetFileFromURLSpec implementations 1.101 +//--------------------------------------------------------------------------- 1.102 +nsresult 1.103 +net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result) 1.104 +{ 1.105 + nsAutoCString escPath; 1.106 + nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); 1.107 + if (NS_FAILED(rv)) 1.108 + return rv; 1.109 + 1.110 + if (escPath.Last() != '/') { 1.111 + escPath += '/'; 1.112 + } 1.113 + 1.114 + result = escPath; 1.115 + return NS_OK; 1.116 +} 1.117 + 1.118 +nsresult 1.119 +net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result) 1.120 +{ 1.121 + nsAutoCString escPath; 1.122 + nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); 1.123 + if (NS_FAILED(rv)) 1.124 + return rv; 1.125 + 1.126 + // if this file references a directory, then we need to ensure that the 1.127 + // URL ends with a slash. this is important since it affects the rules 1.128 + // for relative URL resolution when this URL is used as a base URL. 1.129 + // if the file does not exist, then we make no assumption about its type, 1.130 + // and simply leave the URL unmodified. 1.131 + if (escPath.Last() != '/') { 1.132 + bool dir; 1.133 + rv = aFile->IsDirectory(&dir); 1.134 + if (NS_SUCCEEDED(rv) && dir) 1.135 + escPath += '/'; 1.136 + } 1.137 + 1.138 + result = escPath; 1.139 + return NS_OK; 1.140 +} 1.141 + 1.142 +//---------------------------------------------------------------------------- 1.143 +// file:// URL parsing 1.144 +//---------------------------------------------------------------------------- 1.145 + 1.146 +nsresult 1.147 +net_ParseFileURL(const nsACString &inURL, 1.148 + nsACString &outDirectory, 1.149 + nsACString &outFileBaseName, 1.150 + nsACString &outFileExtension) 1.151 +{ 1.152 + nsresult rv; 1.153 + 1.154 + outDirectory.Truncate(); 1.155 + outFileBaseName.Truncate(); 1.156 + outFileExtension.Truncate(); 1.157 + 1.158 + const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL); 1.159 + const char *url = flatURL.get(); 1.160 + 1.161 + uint32_t schemeBeg, schemeEnd; 1.162 + rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr); 1.163 + if (NS_FAILED(rv)) return rv; 1.164 + 1.165 + if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) { 1.166 + NS_ERROR("must be a file:// url"); 1.167 + return NS_ERROR_UNEXPECTED; 1.168 + } 1.169 + 1.170 + nsIURLParser *parser = net_GetNoAuthURLParser(); 1.171 + NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED); 1.172 + 1.173 + uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos; 1.174 + int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen; 1.175 + 1.176 + // invoke the parser to extract the URL path 1.177 + rv = parser->ParseURL(url, flatURL.Length(), 1.178 + nullptr, nullptr, // don't care about scheme 1.179 + nullptr, nullptr, // don't care about authority 1.180 + &pathPos, &pathLen); 1.181 + if (NS_FAILED(rv)) return rv; 1.182 + 1.183 + // invoke the parser to extract filepath from the path 1.184 + rv = parser->ParsePath(url + pathPos, pathLen, 1.185 + &filepathPos, &filepathLen, 1.186 + nullptr, nullptr, // don't care about query 1.187 + nullptr, nullptr); // don't care about ref 1.188 + if (NS_FAILED(rv)) return rv; 1.189 + 1.190 + filepathPos += pathPos; 1.191 + 1.192 + // invoke the parser to extract the directory and filename from filepath 1.193 + rv = parser->ParseFilePath(url + filepathPos, filepathLen, 1.194 + &directoryPos, &directoryLen, 1.195 + &basenamePos, &basenameLen, 1.196 + &extensionPos, &extensionLen); 1.197 + if (NS_FAILED(rv)) return rv; 1.198 + 1.199 + if (directoryLen > 0) 1.200 + outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen); 1.201 + if (basenameLen > 0) 1.202 + outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen); 1.203 + if (extensionLen > 0) 1.204 + outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen); 1.205 + // since we are using a no-auth url parser, there will never be a host 1.206 + // XXX not strictly true... file://localhost/foo/bar.html is a valid URL 1.207 + 1.208 + return NS_OK; 1.209 +} 1.210 + 1.211 +//---------------------------------------------------------------------------- 1.212 +// path manipulation functions 1.213 +//---------------------------------------------------------------------------- 1.214 + 1.215 +// Replace all /./ with a / while resolving URLs 1.216 +// But only till #? 1.217 +void 1.218 +net_CoalesceDirs(netCoalesceFlags flags, char* path) 1.219 +{ 1.220 + /* Stolen from the old netlib's mkparse.c. 1.221 + * 1.222 + * modifies a url of the form /foo/../foo1 -> /foo1 1.223 + * and /foo/./foo1 -> /foo/foo1 1.224 + * and /foo/foo1/.. -> /foo/ 1.225 + */ 1.226 + char *fwdPtr = path; 1.227 + char *urlPtr = path; 1.228 + char *lastslash = path; 1.229 + uint32_t traversal = 0; 1.230 + uint32_t special_ftp_len = 0; 1.231 + 1.232 + /* Remember if this url is a special ftp one: */ 1.233 + if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) 1.234 + { 1.235 + /* some schemes (for example ftp) have the speciality that 1.236 + the path can begin // or /%2F to mark the root of the 1.237 + servers filesystem, a simple / only marks the root relative 1.238 + to the user loging in. We remember the length of the marker */ 1.239 + if (nsCRT::strncasecmp(path,"/%2F",4) == 0) 1.240 + special_ftp_len = 4; 1.241 + else if (nsCRT::strncmp(path,"//",2) == 0 ) 1.242 + special_ftp_len = 2; 1.243 + } 1.244 + 1.245 + /* find the last slash before # or ? */ 1.246 + for(; (*fwdPtr != '\0') && 1.247 + (*fwdPtr != '?') && 1.248 + (*fwdPtr != '#'); ++fwdPtr) 1.249 + { 1.250 + } 1.251 + 1.252 + /* found nothing, but go back one only */ 1.253 + /* if there is something to go back to */ 1.254 + if (fwdPtr != path && *fwdPtr == '\0') 1.255 + { 1.256 + --fwdPtr; 1.257 + } 1.258 + 1.259 + /* search the slash */ 1.260 + for(; (fwdPtr != path) && 1.261 + (*fwdPtr != '/'); --fwdPtr) 1.262 + { 1.263 + } 1.264 + lastslash = fwdPtr; 1.265 + fwdPtr = path; 1.266 + 1.267 + /* replace all %2E or %2e with . in the path */ 1.268 + /* but stop at lastchar if non null */ 1.269 + for(; (*fwdPtr != '\0') && 1.270 + (*fwdPtr != '?') && 1.271 + (*fwdPtr != '#') && 1.272 + (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr) 1.273 + { 1.274 + if (*fwdPtr == '%' && *(fwdPtr+1) == '2' && 1.275 + (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e')) 1.276 + { 1.277 + *urlPtr++ = '.'; 1.278 + ++fwdPtr; 1.279 + ++fwdPtr; 1.280 + } 1.281 + else 1.282 + { 1.283 + *urlPtr++ = *fwdPtr; 1.284 + } 1.285 + } 1.286 + // Copy remaining stuff past the #?; 1.287 + for (; *fwdPtr != '\0'; ++fwdPtr) 1.288 + { 1.289 + *urlPtr++ = *fwdPtr; 1.290 + } 1.291 + *urlPtr = '\0'; // terminate the url 1.292 + 1.293 + // start again, this time for real 1.294 + fwdPtr = path; 1.295 + urlPtr = path; 1.296 + 1.297 + for(; (*fwdPtr != '\0') && 1.298 + (*fwdPtr != '?') && 1.299 + (*fwdPtr != '#'); ++fwdPtr) 1.300 + { 1.301 + if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' ) 1.302 + { 1.303 + // remove . followed by slash 1.304 + ++fwdPtr; 1.305 + } 1.306 + else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' && 1.307 + (*(fwdPtr+3) == '/' || 1.308 + *(fwdPtr+3) == '\0' || // This will take care of 1.309 + *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag 1.310 + *(fwdPtr+3) == '#')) 1.311 + { 1.312 + // remove foo/.. 1.313 + // reverse the urlPtr to the previous slash if possible 1.314 + // if url does not allow relative root then drop .. above root 1.315 + // otherwise retain them in the path 1.316 + if(traversal > 0 || !(flags & 1.317 + NET_COALESCE_ALLOW_RELATIVE_ROOT)) 1.318 + { 1.319 + if (urlPtr != path) 1.320 + urlPtr--; // we must be going back at least by one 1.321 + for(;*urlPtr != '/' && urlPtr != path; urlPtr--) 1.322 + ; // null body 1.323 + --traversal; // count back 1.324 + // forward the fwdPtr past the ../ 1.325 + fwdPtr += 2; 1.326 + // if we have reached the beginning of the path 1.327 + // while searching for the previous / and we remember 1.328 + // that it is an url that begins with /%2F then 1.329 + // advance urlPtr again by 3 chars because /%2F already 1.330 + // marks the root of the path 1.331 + if (urlPtr == path && special_ftp_len > 3) 1.332 + { 1.333 + ++urlPtr; 1.334 + ++urlPtr; 1.335 + ++urlPtr; 1.336 + } 1.337 + // special case if we have reached the end 1.338 + // to preserve the last / 1.339 + if (*fwdPtr == '.' && *(fwdPtr+1) == '\0') 1.340 + ++urlPtr; 1.341 + } 1.342 + else 1.343 + { 1.344 + // there are to much /.. in this path, just copy them instead. 1.345 + // forward the urlPtr past the /.. and copying it 1.346 + 1.347 + // However if we remember it is an url that starts with 1.348 + // /%2F and urlPtr just points at the "F" of "/%2F" then do 1.349 + // not overwrite it with the /, just copy .. and move forward 1.350 + // urlPtr. 1.351 + if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1) 1.352 + ++urlPtr; 1.353 + else 1.354 + *urlPtr++ = *fwdPtr; 1.355 + ++fwdPtr; 1.356 + *urlPtr++ = *fwdPtr; 1.357 + ++fwdPtr; 1.358 + *urlPtr++ = *fwdPtr; 1.359 + } 1.360 + } 1.361 + else 1.362 + { 1.363 + // count the hierachie, but only if we do not have reached 1.364 + // the root of some special urls with a special root marker 1.365 + if (*fwdPtr == '/' && *(fwdPtr+1) != '.' && 1.366 + (special_ftp_len != 2 || *(fwdPtr+1) != '/')) 1.367 + traversal++; 1.368 + // copy the url incrementaly 1.369 + *urlPtr++ = *fwdPtr; 1.370 + } 1.371 + } 1.372 + 1.373 + /* 1.374 + * Now lets remove trailing . case 1.375 + * /foo/foo1/. -> /foo/foo1/ 1.376 + */ 1.377 + 1.378 + if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/')) 1.379 + urlPtr--; 1.380 + 1.381 + // Copy remaining stuff past the #?; 1.382 + for (; *fwdPtr != '\0'; ++fwdPtr) 1.383 + { 1.384 + *urlPtr++ = *fwdPtr; 1.385 + } 1.386 + *urlPtr = '\0'; // terminate the url 1.387 +} 1.388 + 1.389 +nsresult 1.390 +net_ResolveRelativePath(const nsACString &relativePath, 1.391 + const nsACString &basePath, 1.392 + nsACString &result) 1.393 +{ 1.394 + nsAutoCString name; 1.395 + nsAutoCString path(basePath); 1.396 + bool needsDelim = false; 1.397 + 1.398 + if ( !path.IsEmpty() ) { 1.399 + char16_t last = path.Last(); 1.400 + needsDelim = !(last == '/'); 1.401 + } 1.402 + 1.403 + nsACString::const_iterator beg, end; 1.404 + relativePath.BeginReading(beg); 1.405 + relativePath.EndReading(end); 1.406 + 1.407 + bool stop = false; 1.408 + char c; 1.409 + for (; !stop; ++beg) { 1.410 + c = (beg == end) ? '\0' : *beg; 1.411 + //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get()); 1.412 + switch (c) { 1.413 + case '\0': 1.414 + case '#': 1.415 + case '?': 1.416 + stop = true; 1.417 + // fall through... 1.418 + case '/': 1.419 + // delimiter found 1.420 + if (name.EqualsLiteral("..")) { 1.421 + // pop path 1.422 + // If we already have the delim at end, then 1.423 + // skip over that when searching for next one to the left 1.424 + int32_t offset = path.Length() - (needsDelim ? 1 : 2); 1.425 + // First check for errors 1.426 + if (offset < 0 ) 1.427 + return NS_ERROR_MALFORMED_URI; 1.428 + int32_t pos = path.RFind("/", false, offset); 1.429 + if (pos >= 0) 1.430 + path.Truncate(pos + 1); 1.431 + else 1.432 + path.Truncate(); 1.433 + } 1.434 + else if (name.IsEmpty() || name.EqualsLiteral(".")) { 1.435 + // do nothing 1.436 + } 1.437 + else { 1.438 + // append name to path 1.439 + if (needsDelim) 1.440 + path += '/'; 1.441 + path += name; 1.442 + needsDelim = true; 1.443 + } 1.444 + name.Truncate(); 1.445 + break; 1.446 + 1.447 + default: 1.448 + // append char to name 1.449 + name += c; 1.450 + } 1.451 + } 1.452 + // append anything left on relativePath (e.g. #..., ;..., ?...) 1.453 + if (c != '\0') 1.454 + path += Substring(--beg, end); 1.455 + 1.456 + result = path; 1.457 + return NS_OK; 1.458 +} 1.459 + 1.460 +//---------------------------------------------------------------------------- 1.461 +// scheme fu 1.462 +//---------------------------------------------------------------------------- 1.463 + 1.464 +/* Extract URI-Scheme if possible */ 1.465 +nsresult 1.466 +net_ExtractURLScheme(const nsACString &inURI, 1.467 + uint32_t *startPos, 1.468 + uint32_t *endPos, 1.469 + nsACString *scheme) 1.470 +{ 1.471 + // search for something up to a colon, and call it the scheme 1.472 + const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI); 1.473 + const char* uri_start = flatURI.get(); 1.474 + const char* uri = uri_start; 1.475 + 1.476 + if (!uri) 1.477 + return NS_ERROR_MALFORMED_URI; 1.478 + 1.479 + // skip leading white space 1.480 + while (nsCRT::IsAsciiSpace(*uri)) 1.481 + uri++; 1.482 + 1.483 + uint32_t start = uri - uri_start; 1.484 + if (startPos) { 1.485 + *startPos = start; 1.486 + } 1.487 + 1.488 + uint32_t length = 0; 1.489 + char c; 1.490 + while ((c = *uri++) != '\0') { 1.491 + // First char must be Alpha 1.492 + if (length == 0 && nsCRT::IsAsciiAlpha(c)) { 1.493 + length++; 1.494 + } 1.495 + // Next chars can be alpha + digit + some special chars 1.496 + else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 1.497 + nsCRT::IsAsciiDigit(c) || c == '+' || 1.498 + c == '.' || c == '-')) { 1.499 + length++; 1.500 + } 1.501 + // stop if colon reached but not as first char 1.502 + else if (c == ':' && length > 0) { 1.503 + if (endPos) { 1.504 + *endPos = start + length; 1.505 + } 1.506 + 1.507 + if (scheme) 1.508 + scheme->Assign(Substring(inURI, start, length)); 1.509 + return NS_OK; 1.510 + } 1.511 + else 1.512 + break; 1.513 + } 1.514 + return NS_ERROR_MALFORMED_URI; 1.515 +} 1.516 + 1.517 +bool 1.518 +net_IsValidScheme(const char *scheme, uint32_t schemeLen) 1.519 +{ 1.520 + // first char must be alpha 1.521 + if (!nsCRT::IsAsciiAlpha(*scheme)) 1.522 + return false; 1.523 + 1.524 + // nsCStrings may have embedded nulls -- reject those too 1.525 + for (; schemeLen; ++scheme, --schemeLen) { 1.526 + if (!(nsCRT::IsAsciiAlpha(*scheme) || 1.527 + nsCRT::IsAsciiDigit(*scheme) || 1.528 + *scheme == '+' || 1.529 + *scheme == '.' || 1.530 + *scheme == '-')) 1.531 + return false; 1.532 + } 1.533 + 1.534 + return true; 1.535 +} 1.536 + 1.537 +bool 1.538 +net_FilterURIString(const char *str, nsACString& result) 1.539 +{ 1.540 + NS_PRECONDITION(str, "Must have a non-null string!"); 1.541 + bool writing = false; 1.542 + result.Truncate(); 1.543 + const char *p = str; 1.544 + 1.545 + // Remove leading spaces, tabs, CR, LF if any. 1.546 + while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') { 1.547 + writing = true; 1.548 + str = p + 1; 1.549 + p++; 1.550 + } 1.551 + 1.552 + // Don't strip from the scheme, because other code assumes everything 1.553 + // up to the ':' is the scheme, and it's bad not to have it match. 1.554 + // If there's no ':', strip. 1.555 + bool found_colon = false; 1.556 + const char *first = nullptr; 1.557 + while (*p) { 1.558 + switch (*p) { 1.559 + case '\t': 1.560 + case '\r': 1.561 + case '\n': 1.562 + if (found_colon) { 1.563 + writing = true; 1.564 + // append chars up to but not including *p 1.565 + if (p > str) 1.566 + result.Append(str, p - str); 1.567 + str = p + 1; 1.568 + } else { 1.569 + // remember where the first \t\r\n was in case we find no scheme 1.570 + if (!first) 1.571 + first = p; 1.572 + } 1.573 + break; 1.574 + 1.575 + case ':': 1.576 + found_colon = true; 1.577 + break; 1.578 + 1.579 + case '/': 1.580 + case '@': 1.581 + if (!found_colon) { 1.582 + // colon also has to precede / or @ to be a scheme 1.583 + found_colon = true; // not really, but means ok to strip 1.584 + if (first) { 1.585 + // go back and replace 1.586 + p = first; 1.587 + continue; // process *p again 1.588 + } 1.589 + } 1.590 + break; 1.591 + 1.592 + default: 1.593 + break; 1.594 + } 1.595 + p++; 1.596 + 1.597 + // At end, if there was no scheme, and we hit a control char, fix 1.598 + // it up now. 1.599 + if (!*p && first != nullptr && !found_colon) { 1.600 + // TRICKY - to avoid duplicating code, we reset the loop back 1.601 + // to the point we found something to do 1.602 + p = first; 1.603 + // This also stops us from looping after we finish 1.604 + found_colon = true; // so we'll replace \t\r\n 1.605 + } 1.606 + } 1.607 + 1.608 + // Remove trailing spaces if any 1.609 + while (((p-1) >= str) && (*(p-1) == ' ')) { 1.610 + writing = true; 1.611 + p--; 1.612 + } 1.613 + 1.614 + if (writing && p > str) 1.615 + result.Append(str, p - str); 1.616 + 1.617 + return writing; 1.618 +} 1.619 + 1.620 +#if defined(XP_WIN) 1.621 +bool 1.622 +net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf) 1.623 +{ 1.624 + bool writing = false; 1.625 + 1.626 + nsACString::const_iterator beginIter, endIter; 1.627 + aURL.BeginReading(beginIter); 1.628 + aURL.EndReading(endIter); 1.629 + 1.630 + const char *s, *begin = beginIter.get(); 1.631 + 1.632 + for (s = begin; s != endIter.get(); ++s) 1.633 + { 1.634 + if (*s == '\\') 1.635 + { 1.636 + writing = true; 1.637 + if (s > begin) 1.638 + aResultBuf.Append(begin, s - begin); 1.639 + aResultBuf += '/'; 1.640 + begin = s + 1; 1.641 + } 1.642 + } 1.643 + if (writing && s > begin) 1.644 + aResultBuf.Append(begin, s - begin); 1.645 + 1.646 + return writing; 1.647 +} 1.648 +#endif 1.649 + 1.650 +//---------------------------------------------------------------------------- 1.651 +// miscellaneous (i.e., stuff that should really be elsewhere) 1.652 +//---------------------------------------------------------------------------- 1.653 + 1.654 +static inline 1.655 +void ToLower(char &c) 1.656 +{ 1.657 + if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) 1.658 + c += 'a' - 'A'; 1.659 +} 1.660 + 1.661 +void 1.662 +net_ToLowerCase(char *str, uint32_t length) 1.663 +{ 1.664 + for (char *end = str + length; str < end; ++str) 1.665 + ToLower(*str); 1.666 +} 1.667 + 1.668 +void 1.669 +net_ToLowerCase(char *str) 1.670 +{ 1.671 + for (; *str; ++str) 1.672 + ToLower(*str); 1.673 +} 1.674 + 1.675 +char * 1.676 +net_FindCharInSet(const char *iter, const char *stop, const char *set) 1.677 +{ 1.678 + for (; iter != stop && *iter; ++iter) { 1.679 + for (const char *s = set; *s; ++s) { 1.680 + if (*iter == *s) 1.681 + return (char *) iter; 1.682 + } 1.683 + } 1.684 + return (char *) iter; 1.685 +} 1.686 + 1.687 +char * 1.688 +net_FindCharNotInSet(const char *iter, const char *stop, const char *set) 1.689 +{ 1.690 +repeat: 1.691 + for (const char *s = set; *s; ++s) { 1.692 + if (*iter == *s) { 1.693 + if (++iter == stop) 1.694 + break; 1.695 + goto repeat; 1.696 + } 1.697 + } 1.698 + return (char *) iter; 1.699 +} 1.700 + 1.701 +char * 1.702 +net_RFindCharNotInSet(const char *stop, const char *iter, const char *set) 1.703 +{ 1.704 + --iter; 1.705 + --stop; 1.706 + 1.707 + if (iter == stop) 1.708 + return (char *) iter; 1.709 + 1.710 +repeat: 1.711 + for (const char *s = set; *s; ++s) { 1.712 + if (*iter == *s) { 1.713 + if (--iter == stop) 1.714 + break; 1.715 + goto repeat; 1.716 + } 1.717 + } 1.718 + return (char *) iter; 1.719 +} 1.720 + 1.721 +#define HTTP_LWS " \t" 1.722 + 1.723 +// Return the index of the closing quote of the string, if any 1.724 +static uint32_t 1.725 +net_FindStringEnd(const nsCString& flatStr, 1.726 + uint32_t stringStart, 1.727 + char stringDelim) 1.728 +{ 1.729 + NS_ASSERTION(stringStart < flatStr.Length() && 1.730 + flatStr.CharAt(stringStart) == stringDelim && 1.731 + (stringDelim == '"' || stringDelim == '\''), 1.732 + "Invalid stringStart"); 1.733 + 1.734 + const char set[] = { stringDelim, '\\', '\0' }; 1.735 + do { 1.736 + // stringStart points to either the start quote or the last 1.737 + // escaped char (the char following a '\\') 1.738 + 1.739 + // Write to searchStart here, so that when we get back to the 1.740 + // top of the loop right outside this one we search from the 1.741 + // right place. 1.742 + uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1); 1.743 + if (stringEnd == uint32_t(kNotFound)) 1.744 + return flatStr.Length(); 1.745 + 1.746 + if (flatStr.CharAt(stringEnd) == '\\') { 1.747 + // Hit a backslash-escaped char. Need to skip over it. 1.748 + stringStart = stringEnd + 1; 1.749 + if (stringStart == flatStr.Length()) 1.750 + return stringStart; 1.751 + 1.752 + // Go back to looking for the next escape or the string end 1.753 + continue; 1.754 + } 1.755 + 1.756 + return stringEnd; 1.757 + 1.758 + } while (true); 1.759 + 1.760 + NS_NOTREACHED("How did we get here?"); 1.761 + return flatStr.Length(); 1.762 +} 1.763 + 1.764 + 1.765 +static uint32_t 1.766 +net_FindMediaDelimiter(const nsCString& flatStr, 1.767 + uint32_t searchStart, 1.768 + char delimiter) 1.769 +{ 1.770 + do { 1.771 + // searchStart points to the spot from which we should start looking 1.772 + // for the delimiter. 1.773 + const char delimStr[] = { delimiter, '"', '\0' }; 1.774 + uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart); 1.775 + if (curDelimPos == uint32_t(kNotFound)) 1.776 + return flatStr.Length(); 1.777 + 1.778 + char ch = flatStr.CharAt(curDelimPos); 1.779 + if (ch == delimiter) { 1.780 + // Found delimiter 1.781 + return curDelimPos; 1.782 + } 1.783 + 1.784 + // We hit the start of a quoted string. Look for its end. 1.785 + searchStart = net_FindStringEnd(flatStr, curDelimPos, ch); 1.786 + if (searchStart == flatStr.Length()) 1.787 + return searchStart; 1.788 + 1.789 + ++searchStart; 1.790 + 1.791 + // searchStart now points to the first char after the end of the 1.792 + // string, so just go back to the top of the loop and look for 1.793 + // |delimiter| again. 1.794 + } while (true); 1.795 + 1.796 + NS_NOTREACHED("How did we get here?"); 1.797 + return flatStr.Length(); 1.798 +} 1.799 + 1.800 +// aOffset should be added to aCharsetStart and aCharsetEnd if this 1.801 +// function sets them. 1.802 +static void 1.803 +net_ParseMediaType(const nsACString &aMediaTypeStr, 1.804 + nsACString &aContentType, 1.805 + nsACString &aContentCharset, 1.806 + int32_t aOffset, 1.807 + bool *aHadCharset, 1.808 + int32_t *aCharsetStart, 1.809 + int32_t *aCharsetEnd) 1.810 +{ 1.811 + const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr); 1.812 + const char* start = flatStr.get(); 1.813 + const char* end = start + flatStr.Length(); 1.814 + 1.815 + // Trim LWS leading and trailing whitespace from type. We include '(' in 1.816 + // the trailing trim set to catch media-type comments, which are not at all 1.817 + // standard, but may occur in rare cases. 1.818 + const char* type = net_FindCharNotInSet(start, end, HTTP_LWS); 1.819 + const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";("); 1.820 + 1.821 + const char* charset = ""; 1.822 + const char* charsetEnd = charset; 1.823 + int32_t charsetParamStart = 0; 1.824 + int32_t charsetParamEnd = 0; 1.825 + 1.826 + // Iterate over parameters 1.827 + bool typeHasCharset = false; 1.828 + uint32_t paramStart = flatStr.FindChar(';', typeEnd - start); 1.829 + if (paramStart != uint32_t(kNotFound)) { 1.830 + // We have parameters. Iterate over them. 1.831 + uint32_t curParamStart = paramStart + 1; 1.832 + do { 1.833 + uint32_t curParamEnd = 1.834 + net_FindMediaDelimiter(flatStr, curParamStart, ';'); 1.835 + 1.836 + const char* paramName = net_FindCharNotInSet(start + curParamStart, 1.837 + start + curParamEnd, 1.838 + HTTP_LWS); 1.839 + static const char charsetStr[] = "charset="; 1.840 + if (PL_strncasecmp(paramName, charsetStr, 1.841 + sizeof(charsetStr) - 1) == 0) { 1.842 + charset = paramName + sizeof(charsetStr) - 1; 1.843 + charsetEnd = start + curParamEnd; 1.844 + typeHasCharset = true; 1.845 + charsetParamStart = curParamStart - 1; 1.846 + charsetParamEnd = curParamEnd; 1.847 + } 1.848 + 1.849 + curParamStart = curParamEnd + 1; 1.850 + } while (curParamStart < flatStr.Length()); 1.851 + } 1.852 + 1.853 + bool charsetNeedsQuotedStringUnescaping = false; 1.854 + if (typeHasCharset) { 1.855 + // Trim LWS leading and trailing whitespace from charset. We include 1.856 + // '(' in the trailing trim set to catch media-type comments, which are 1.857 + // not at all standard, but may occur in rare cases. 1.858 + charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS); 1.859 + if (*charset == '"') { 1.860 + charsetNeedsQuotedStringUnescaping = true; 1.861 + charsetEnd = 1.862 + start + net_FindStringEnd(flatStr, charset - start, *charset); 1.863 + charset++; 1.864 + NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing"); 1.865 + } else { 1.866 + charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";("); 1.867 + } 1.868 + } 1.869 + 1.870 + // if the server sent "*/*", it is meaningless, so do not store it. 1.871 + // also, if type is the same as aContentType, then just update the 1.872 + // charset. however, if charset is empty and aContentType hasn't 1.873 + // changed, then don't wipe-out an existing aContentCharset. We 1.874 + // also want to reject a mime-type if it does not include a slash. 1.875 + // some servers give junk after the charset parameter, which may 1.876 + // include a comma, so this check makes us a bit more tolerant. 1.877 + 1.878 + if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 && 1.879 + memchr(type, '/', typeEnd - type) != nullptr) { 1.880 + // Common case here is that aContentType is empty 1.881 + bool eq = !aContentType.IsEmpty() && 1.882 + aContentType.Equals(Substring(type, typeEnd), 1.883 + nsCaseInsensitiveCStringComparator()); 1.884 + if (!eq) { 1.885 + aContentType.Assign(type, typeEnd - type); 1.886 + ToLowerCase(aContentType); 1.887 + } 1.888 + 1.889 + if ((!eq && *aHadCharset) || typeHasCharset) { 1.890 + *aHadCharset = true; 1.891 + if (charsetNeedsQuotedStringUnescaping) { 1.892 + // parameters using the "quoted-string" syntax need 1.893 + // backslash-escapes to be unescaped (see RFC 2616 Section 2.2) 1.894 + aContentCharset.Truncate(); 1.895 + for (const char *c = charset; c != charsetEnd; c++) { 1.896 + if (*c == '\\' && c + 1 != charsetEnd) { 1.897 + // eat escape 1.898 + c++; 1.899 + } 1.900 + aContentCharset.Append(*c); 1.901 + } 1.902 + } 1.903 + else { 1.904 + aContentCharset.Assign(charset, charsetEnd - charset); 1.905 + } 1.906 + if (typeHasCharset) { 1.907 + *aCharsetStart = charsetParamStart + aOffset; 1.908 + *aCharsetEnd = charsetParamEnd + aOffset; 1.909 + } 1.910 + } 1.911 + // Only set a new charset position if this is a different type 1.912 + // from the last one we had and it doesn't already have a 1.913 + // charset param. If this is the same type, we probably want 1.914 + // to leave the charset position on its first occurrence. 1.915 + if (!eq && !typeHasCharset) { 1.916 + int32_t charsetStart = int32_t(paramStart); 1.917 + if (charsetStart == kNotFound) 1.918 + charsetStart = flatStr.Length(); 1.919 + 1.920 + *aCharsetEnd = *aCharsetStart = charsetStart + aOffset; 1.921 + } 1.922 + } 1.923 +} 1.924 + 1.925 +#undef HTTP_LWS 1.926 + 1.927 +void 1.928 +net_ParseContentType(const nsACString &aHeaderStr, 1.929 + nsACString &aContentType, 1.930 + nsACString &aContentCharset, 1.931 + bool *aHadCharset) 1.932 +{ 1.933 + int32_t dummy1, dummy2; 1.934 + net_ParseContentType(aHeaderStr, aContentType, aContentCharset, 1.935 + aHadCharset, &dummy1, &dummy2); 1.936 +} 1.937 + 1.938 +void 1.939 +net_ParseContentType(const nsACString &aHeaderStr, 1.940 + nsACString &aContentType, 1.941 + nsACString &aContentCharset, 1.942 + bool *aHadCharset, 1.943 + int32_t *aCharsetStart, 1.944 + int32_t *aCharsetEnd) 1.945 +{ 1.946 + // 1.947 + // Augmented BNF (from RFC 2616 section 3.7): 1.948 + // 1.949 + // header-value = media-type *( LWS "," LWS media-type ) 1.950 + // media-type = type "/" subtype *( LWS ";" LWS parameter ) 1.951 + // type = token 1.952 + // subtype = token 1.953 + // parameter = attribute "=" value 1.954 + // attribute = token 1.955 + // value = token | quoted-string 1.956 + // 1.957 + // 1.958 + // Examples: 1.959 + // 1.960 + // text/html 1.961 + // text/html, text/html 1.962 + // text/html,text/html; charset=ISO-8859-1 1.963 + // text/html,text/html; charset="ISO-8859-1" 1.964 + // text/html;charset=ISO-8859-1, text/html 1.965 + // text/html;charset='ISO-8859-1', text/html 1.966 + // application/octet-stream 1.967 + // 1.968 + 1.969 + *aHadCharset = false; 1.970 + const nsCString& flatStr = PromiseFlatCString(aHeaderStr); 1.971 + 1.972 + // iterate over media-types. Note that ',' characters can happen 1.973 + // inside quoted strings, so we need to watch out for that. 1.974 + uint32_t curTypeStart = 0; 1.975 + do { 1.976 + // curTypeStart points to the start of the current media-type. We want 1.977 + // to look for its end. 1.978 + uint32_t curTypeEnd = 1.979 + net_FindMediaDelimiter(flatStr, curTypeStart, ','); 1.980 + 1.981 + // At this point curTypeEnd points to the spot where the media-type 1.982 + // starting at curTypeEnd ends. Time to parse that! 1.983 + net_ParseMediaType(Substring(flatStr, curTypeStart, 1.984 + curTypeEnd - curTypeStart), 1.985 + aContentType, aContentCharset, curTypeStart, 1.986 + aHadCharset, aCharsetStart, aCharsetEnd); 1.987 + 1.988 + // And let's move on to the next media-type 1.989 + curTypeStart = curTypeEnd + 1; 1.990 + } while (curTypeStart < flatStr.Length()); 1.991 +} 1.992 + 1.993 +bool 1.994 +net_IsValidHostName(const nsCSubstring &host) 1.995 +{ 1.996 + const char *end = host.EndReading(); 1.997 + // Use explicit whitelists to select which characters we are 1.998 + // willing to send to lower-level DNS logic. This is more 1.999 + // self-documenting, and can also be slightly faster than the 1.1000 + // blacklist approach, since DNS names are the common case, and 1.1001 + // the commonest characters will tend to be near the start of 1.1002 + // the list. 1.1003 + 1.1004 + // Whitelist for DNS names (RFC 1035) with extra characters added 1.1005 + // for pragmatic reasons "$+_" 1.1006 + // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2 1.1007 + if (net_FindCharNotInSet(host.BeginReading(), end, 1.1008 + "abcdefghijklmnopqrstuvwxyz" 1.1009 + ".-0123456789" 1.1010 + "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) 1.1011 + return true; 1.1012 + 1.1013 + // Might be a valid IPv6 link-local address containing a percent sign 1.1014 + nsAutoCString strhost(host); 1.1015 + PRNetAddr addr; 1.1016 + return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS; 1.1017 +} 1.1018 + 1.1019 +bool 1.1020 +net_IsValidIPv4Addr(const char *addr, int32_t addrLen) 1.1021 +{ 1.1022 + RangedPtr<const char> p(addr, addrLen); 1.1023 + 1.1024 + int32_t octet = -1; // means no digit yet 1.1025 + int32_t dotCount = 0; // number of dots in the address 1.1026 + 1.1027 + for (; addrLen; ++p, --addrLen) { 1.1028 + if (*p == '.') { 1.1029 + dotCount++; 1.1030 + if (octet == -1) { 1.1031 + // invalid octet 1.1032 + return false; 1.1033 + } 1.1034 + octet = -1; 1.1035 + } else if (*p >= '0' && *p <='9') { 1.1036 + if (octet == 0) { 1.1037 + // leading 0 is not allowed 1.1038 + return false; 1.1039 + } else if (octet == -1) { 1.1040 + octet = *p - '0'; 1.1041 + } else { 1.1042 + octet *= 10; 1.1043 + octet += *p - '0'; 1.1044 + if (octet > 255) 1.1045 + return false; 1.1046 + } 1.1047 + } else { 1.1048 + // invalid character 1.1049 + return false; 1.1050 + } 1.1051 + } 1.1052 + 1.1053 + return (dotCount == 3 && octet != -1); 1.1054 +} 1.1055 + 1.1056 +bool 1.1057 +net_IsValidIPv6Addr(const char *addr, int32_t addrLen) 1.1058 +{ 1.1059 + RangedPtr<const char> p(addr, addrLen); 1.1060 + 1.1061 + int32_t digits = 0; // number of digits in current block 1.1062 + int32_t colons = 0; // number of colons in a row during parsing 1.1063 + int32_t blocks = 0; // number of hexadecimal blocks 1.1064 + bool haveZeros = false; // true if double colon is present in the address 1.1065 + 1.1066 + for (; addrLen; ++p, --addrLen) { 1.1067 + if (*p == ':') { 1.1068 + if (colons == 0) { 1.1069 + if (digits != 0) { 1.1070 + digits = 0; 1.1071 + blocks++; 1.1072 + } 1.1073 + } else if (colons == 1) { 1.1074 + if (haveZeros) 1.1075 + return false; // only one occurrence is allowed 1.1076 + haveZeros = true; 1.1077 + } else { 1.1078 + // too many colons in a row 1.1079 + return false; 1.1080 + } 1.1081 + colons++; 1.1082 + } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || 1.1083 + (*p >= 'A' && *p <= 'F')) { 1.1084 + if (colons == 1 && blocks == 0) // starts with a single colon 1.1085 + return false; 1.1086 + if (digits == 4) // too many digits 1.1087 + return false; 1.1088 + colons = 0; 1.1089 + digits++; 1.1090 + } else if (*p == '.') { 1.1091 + // check valid IPv4 from the beginning of the last block 1.1092 + if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits)) 1.1093 + return false; 1.1094 + return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6); 1.1095 + } else { 1.1096 + // invalid character 1.1097 + return false; 1.1098 + } 1.1099 + } 1.1100 + 1.1101 + if (colons == 1) // ends with a single colon 1.1102 + return false; 1.1103 + 1.1104 + if (digits) // there is a block at the end 1.1105 + blocks++; 1.1106 + 1.1107 + return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8); 1.1108 +}