michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: /* vim:set ts=4 sw=4 sts=4 et cindent: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #include "mozilla/RangedPtr.h" michael@0: michael@0: #include "nsURLHelper.h" michael@0: #include "nsIFile.h" michael@0: #include "nsIURLParser.h" michael@0: #include "nsCOMPtr.h" michael@0: #include "nsCRT.h" michael@0: #include "nsNetCID.h" michael@0: #include "prnetdb.h" michael@0: michael@0: using namespace mozilla; michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // Init/Shutdown michael@0: //---------------------------------------------------------------------------- michael@0: michael@0: static bool gInitialized = false; michael@0: static nsIURLParser *gNoAuthURLParser = nullptr; michael@0: static nsIURLParser *gAuthURLParser = nullptr; michael@0: static nsIURLParser *gStdURLParser = nullptr; michael@0: michael@0: static void michael@0: InitGlobals() michael@0: { michael@0: nsCOMPtr parser; michael@0: michael@0: parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID); michael@0: NS_ASSERTION(parser, "failed getting 'noauth' url parser"); michael@0: if (parser) { michael@0: gNoAuthURLParser = parser.get(); michael@0: NS_ADDREF(gNoAuthURLParser); michael@0: } michael@0: michael@0: parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID); michael@0: NS_ASSERTION(parser, "failed getting 'auth' url parser"); michael@0: if (parser) { michael@0: gAuthURLParser = parser.get(); michael@0: NS_ADDREF(gAuthURLParser); michael@0: } michael@0: michael@0: parser = do_GetService(NS_STDURLPARSER_CONTRACTID); michael@0: NS_ASSERTION(parser, "failed getting 'std' url parser"); michael@0: if (parser) { michael@0: gStdURLParser = parser.get(); michael@0: NS_ADDREF(gStdURLParser); michael@0: } michael@0: michael@0: gInitialized = true; michael@0: } michael@0: michael@0: void michael@0: net_ShutdownURLHelper() michael@0: { michael@0: if (gInitialized) { michael@0: NS_IF_RELEASE(gNoAuthURLParser); michael@0: NS_IF_RELEASE(gAuthURLParser); michael@0: NS_IF_RELEASE(gStdURLParser); michael@0: gInitialized = false; michael@0: } michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // nsIURLParser getters michael@0: //---------------------------------------------------------------------------- michael@0: michael@0: nsIURLParser * michael@0: net_GetAuthURLParser() michael@0: { michael@0: if (!gInitialized) michael@0: InitGlobals(); michael@0: return gAuthURLParser; michael@0: } michael@0: michael@0: nsIURLParser * michael@0: net_GetNoAuthURLParser() michael@0: { michael@0: if (!gInitialized) michael@0: InitGlobals(); michael@0: return gNoAuthURLParser; michael@0: } michael@0: michael@0: nsIURLParser * michael@0: net_GetStdURLParser() michael@0: { michael@0: if (!gInitialized) michael@0: InitGlobals(); michael@0: return gStdURLParser; michael@0: } michael@0: michael@0: //--------------------------------------------------------------------------- michael@0: // GetFileFromURLSpec implementations michael@0: //--------------------------------------------------------------------------- michael@0: nsresult michael@0: net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result) michael@0: { michael@0: nsAutoCString escPath; michael@0: nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: michael@0: if (escPath.Last() != '/') { michael@0: escPath += '/'; michael@0: } michael@0: michael@0: result = escPath; michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsresult michael@0: net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result) michael@0: { michael@0: nsAutoCString escPath; michael@0: nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: michael@0: // if this file references a directory, then we need to ensure that the michael@0: // URL ends with a slash. this is important since it affects the rules michael@0: // for relative URL resolution when this URL is used as a base URL. michael@0: // if the file does not exist, then we make no assumption about its type, michael@0: // and simply leave the URL unmodified. michael@0: if (escPath.Last() != '/') { michael@0: bool dir; michael@0: rv = aFile->IsDirectory(&dir); michael@0: if (NS_SUCCEEDED(rv) && dir) michael@0: escPath += '/'; michael@0: } michael@0: michael@0: result = escPath; michael@0: return NS_OK; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // file:// URL parsing michael@0: //---------------------------------------------------------------------------- michael@0: michael@0: nsresult michael@0: net_ParseFileURL(const nsACString &inURL, michael@0: nsACString &outDirectory, michael@0: nsACString &outFileBaseName, michael@0: nsACString &outFileExtension) michael@0: { michael@0: nsresult rv; michael@0: michael@0: outDirectory.Truncate(); michael@0: outFileBaseName.Truncate(); michael@0: outFileExtension.Truncate(); michael@0: michael@0: const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL); michael@0: const char *url = flatURL.get(); michael@0: michael@0: uint32_t schemeBeg, schemeEnd; michael@0: rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) { michael@0: NS_ERROR("must be a file:// url"); michael@0: return NS_ERROR_UNEXPECTED; michael@0: } michael@0: michael@0: nsIURLParser *parser = net_GetNoAuthURLParser(); michael@0: NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED); michael@0: michael@0: uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos; michael@0: int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen; michael@0: michael@0: // invoke the parser to extract the URL path michael@0: rv = parser->ParseURL(url, flatURL.Length(), michael@0: nullptr, nullptr, // don't care about scheme michael@0: nullptr, nullptr, // don't care about authority michael@0: &pathPos, &pathLen); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: // invoke the parser to extract filepath from the path michael@0: rv = parser->ParsePath(url + pathPos, pathLen, michael@0: &filepathPos, &filepathLen, michael@0: nullptr, nullptr, // don't care about query michael@0: nullptr, nullptr); // don't care about ref michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: filepathPos += pathPos; michael@0: michael@0: // invoke the parser to extract the directory and filename from filepath michael@0: rv = parser->ParseFilePath(url + filepathPos, filepathLen, michael@0: &directoryPos, &directoryLen, michael@0: &basenamePos, &basenameLen, michael@0: &extensionPos, &extensionLen); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: if (directoryLen > 0) michael@0: outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen); michael@0: if (basenameLen > 0) michael@0: outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen); michael@0: if (extensionLen > 0) michael@0: outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen); michael@0: // since we are using a no-auth url parser, there will never be a host michael@0: // XXX not strictly true... file://localhost/foo/bar.html is a valid URL michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // path manipulation functions michael@0: //---------------------------------------------------------------------------- michael@0: michael@0: // Replace all /./ with a / while resolving URLs michael@0: // But only till #? michael@0: void michael@0: net_CoalesceDirs(netCoalesceFlags flags, char* path) michael@0: { michael@0: /* Stolen from the old netlib's mkparse.c. michael@0: * michael@0: * modifies a url of the form /foo/../foo1 -> /foo1 michael@0: * and /foo/./foo1 -> /foo/foo1 michael@0: * and /foo/foo1/.. -> /foo/ michael@0: */ michael@0: char *fwdPtr = path; michael@0: char *urlPtr = path; michael@0: char *lastslash = path; michael@0: uint32_t traversal = 0; michael@0: uint32_t special_ftp_len = 0; michael@0: michael@0: /* Remember if this url is a special ftp one: */ michael@0: if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) michael@0: { michael@0: /* some schemes (for example ftp) have the speciality that michael@0: the path can begin // or /%2F to mark the root of the michael@0: servers filesystem, a simple / only marks the root relative michael@0: to the user loging in. We remember the length of the marker */ michael@0: if (nsCRT::strncasecmp(path,"/%2F",4) == 0) michael@0: special_ftp_len = 4; michael@0: else if (nsCRT::strncmp(path,"//",2) == 0 ) michael@0: special_ftp_len = 2; michael@0: } michael@0: michael@0: /* find the last slash before # or ? */ michael@0: for(; (*fwdPtr != '\0') && michael@0: (*fwdPtr != '?') && michael@0: (*fwdPtr != '#'); ++fwdPtr) michael@0: { michael@0: } michael@0: michael@0: /* found nothing, but go back one only */ michael@0: /* if there is something to go back to */ michael@0: if (fwdPtr != path && *fwdPtr == '\0') michael@0: { michael@0: --fwdPtr; michael@0: } michael@0: michael@0: /* search the slash */ michael@0: for(; (fwdPtr != path) && michael@0: (*fwdPtr != '/'); --fwdPtr) michael@0: { michael@0: } michael@0: lastslash = fwdPtr; michael@0: fwdPtr = path; michael@0: michael@0: /* replace all %2E or %2e with . in the path */ michael@0: /* but stop at lastchar if non null */ michael@0: for(; (*fwdPtr != '\0') && michael@0: (*fwdPtr != '?') && michael@0: (*fwdPtr != '#') && michael@0: (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr) michael@0: { michael@0: if (*fwdPtr == '%' && *(fwdPtr+1) == '2' && michael@0: (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e')) michael@0: { michael@0: *urlPtr++ = '.'; michael@0: ++fwdPtr; michael@0: ++fwdPtr; michael@0: } michael@0: else michael@0: { michael@0: *urlPtr++ = *fwdPtr; michael@0: } michael@0: } michael@0: // Copy remaining stuff past the #?; michael@0: for (; *fwdPtr != '\0'; ++fwdPtr) michael@0: { michael@0: *urlPtr++ = *fwdPtr; michael@0: } michael@0: *urlPtr = '\0'; // terminate the url michael@0: michael@0: // start again, this time for real michael@0: fwdPtr = path; michael@0: urlPtr = path; michael@0: michael@0: for(; (*fwdPtr != '\0') && michael@0: (*fwdPtr != '?') && michael@0: (*fwdPtr != '#'); ++fwdPtr) michael@0: { michael@0: if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' ) michael@0: { michael@0: // remove . followed by slash michael@0: ++fwdPtr; michael@0: } michael@0: else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' && michael@0: (*(fwdPtr+3) == '/' || michael@0: *(fwdPtr+3) == '\0' || // This will take care of michael@0: *(fwdPtr+3) == '?' || // something like foo/bar/..#sometag michael@0: *(fwdPtr+3) == '#')) michael@0: { michael@0: // remove foo/.. michael@0: // reverse the urlPtr to the previous slash if possible michael@0: // if url does not allow relative root then drop .. above root michael@0: // otherwise retain them in the path michael@0: if(traversal > 0 || !(flags & michael@0: NET_COALESCE_ALLOW_RELATIVE_ROOT)) michael@0: { michael@0: if (urlPtr != path) michael@0: urlPtr--; // we must be going back at least by one michael@0: for(;*urlPtr != '/' && urlPtr != path; urlPtr--) michael@0: ; // null body michael@0: --traversal; // count back michael@0: // forward the fwdPtr past the ../ michael@0: fwdPtr += 2; michael@0: // if we have reached the beginning of the path michael@0: // while searching for the previous / and we remember michael@0: // that it is an url that begins with /%2F then michael@0: // advance urlPtr again by 3 chars because /%2F already michael@0: // marks the root of the path michael@0: if (urlPtr == path && special_ftp_len > 3) michael@0: { michael@0: ++urlPtr; michael@0: ++urlPtr; michael@0: ++urlPtr; michael@0: } michael@0: // special case if we have reached the end michael@0: // to preserve the last / michael@0: if (*fwdPtr == '.' && *(fwdPtr+1) == '\0') michael@0: ++urlPtr; michael@0: } michael@0: else michael@0: { michael@0: // there are to much /.. in this path, just copy them instead. michael@0: // forward the urlPtr past the /.. and copying it michael@0: michael@0: // However if we remember it is an url that starts with michael@0: // /%2F and urlPtr just points at the "F" of "/%2F" then do michael@0: // not overwrite it with the /, just copy .. and move forward michael@0: // urlPtr. michael@0: if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1) michael@0: ++urlPtr; michael@0: else michael@0: *urlPtr++ = *fwdPtr; michael@0: ++fwdPtr; michael@0: *urlPtr++ = *fwdPtr; michael@0: ++fwdPtr; michael@0: *urlPtr++ = *fwdPtr; michael@0: } michael@0: } michael@0: else michael@0: { michael@0: // count the hierachie, but only if we do not have reached michael@0: // the root of some special urls with a special root marker michael@0: if (*fwdPtr == '/' && *(fwdPtr+1) != '.' && michael@0: (special_ftp_len != 2 || *(fwdPtr+1) != '/')) michael@0: traversal++; michael@0: // copy the url incrementaly michael@0: *urlPtr++ = *fwdPtr; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Now lets remove trailing . case michael@0: * /foo/foo1/. -> /foo/foo1/ michael@0: */ michael@0: michael@0: if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/')) michael@0: urlPtr--; michael@0: michael@0: // Copy remaining stuff past the #?; michael@0: for (; *fwdPtr != '\0'; ++fwdPtr) michael@0: { michael@0: *urlPtr++ = *fwdPtr; michael@0: } michael@0: *urlPtr = '\0'; // terminate the url michael@0: } michael@0: michael@0: nsresult michael@0: net_ResolveRelativePath(const nsACString &relativePath, michael@0: const nsACString &basePath, michael@0: nsACString &result) michael@0: { michael@0: nsAutoCString name; michael@0: nsAutoCString path(basePath); michael@0: bool needsDelim = false; michael@0: michael@0: if ( !path.IsEmpty() ) { michael@0: char16_t last = path.Last(); michael@0: needsDelim = !(last == '/'); michael@0: } michael@0: michael@0: nsACString::const_iterator beg, end; michael@0: relativePath.BeginReading(beg); michael@0: relativePath.EndReading(end); michael@0: michael@0: bool stop = false; michael@0: char c; michael@0: for (; !stop; ++beg) { michael@0: c = (beg == end) ? '\0' : *beg; michael@0: //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get()); michael@0: switch (c) { michael@0: case '\0': michael@0: case '#': michael@0: case '?': michael@0: stop = true; michael@0: // fall through... michael@0: case '/': michael@0: // delimiter found michael@0: if (name.EqualsLiteral("..")) { michael@0: // pop path michael@0: // If we already have the delim at end, then michael@0: // skip over that when searching for next one to the left michael@0: int32_t offset = path.Length() - (needsDelim ? 1 : 2); michael@0: // First check for errors michael@0: if (offset < 0 ) michael@0: return NS_ERROR_MALFORMED_URI; michael@0: int32_t pos = path.RFind("/", false, offset); michael@0: if (pos >= 0) michael@0: path.Truncate(pos + 1); michael@0: else michael@0: path.Truncate(); michael@0: } michael@0: else if (name.IsEmpty() || name.EqualsLiteral(".")) { michael@0: // do nothing michael@0: } michael@0: else { michael@0: // append name to path michael@0: if (needsDelim) michael@0: path += '/'; michael@0: path += name; michael@0: needsDelim = true; michael@0: } michael@0: name.Truncate(); michael@0: break; michael@0: michael@0: default: michael@0: // append char to name michael@0: name += c; michael@0: } michael@0: } michael@0: // append anything left on relativePath (e.g. #..., ;..., ?...) michael@0: if (c != '\0') michael@0: path += Substring(--beg, end); michael@0: michael@0: result = path; michael@0: return NS_OK; michael@0: } michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // scheme fu michael@0: //---------------------------------------------------------------------------- michael@0: michael@0: /* Extract URI-Scheme if possible */ michael@0: nsresult michael@0: net_ExtractURLScheme(const nsACString &inURI, michael@0: uint32_t *startPos, michael@0: uint32_t *endPos, michael@0: nsACString *scheme) michael@0: { michael@0: // search for something up to a colon, and call it the scheme michael@0: const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI); michael@0: const char* uri_start = flatURI.get(); michael@0: const char* uri = uri_start; michael@0: michael@0: if (!uri) michael@0: return NS_ERROR_MALFORMED_URI; michael@0: michael@0: // skip leading white space michael@0: while (nsCRT::IsAsciiSpace(*uri)) michael@0: uri++; michael@0: michael@0: uint32_t start = uri - uri_start; michael@0: if (startPos) { michael@0: *startPos = start; michael@0: } michael@0: michael@0: uint32_t length = 0; michael@0: char c; michael@0: while ((c = *uri++) != '\0') { michael@0: // First char must be Alpha michael@0: if (length == 0 && nsCRT::IsAsciiAlpha(c)) { michael@0: length++; michael@0: } michael@0: // Next chars can be alpha + digit + some special chars michael@0: else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || michael@0: nsCRT::IsAsciiDigit(c) || c == '+' || michael@0: c == '.' || c == '-')) { michael@0: length++; michael@0: } michael@0: // stop if colon reached but not as first char michael@0: else if (c == ':' && length > 0) { michael@0: if (endPos) { michael@0: *endPos = start + length; michael@0: } michael@0: michael@0: if (scheme) michael@0: scheme->Assign(Substring(inURI, start, length)); michael@0: return NS_OK; michael@0: } michael@0: else michael@0: break; michael@0: } michael@0: return NS_ERROR_MALFORMED_URI; michael@0: } michael@0: michael@0: bool michael@0: net_IsValidScheme(const char *scheme, uint32_t schemeLen) michael@0: { michael@0: // first char must be alpha michael@0: if (!nsCRT::IsAsciiAlpha(*scheme)) michael@0: return false; michael@0: michael@0: // nsCStrings may have embedded nulls -- reject those too michael@0: for (; schemeLen; ++scheme, --schemeLen) { michael@0: if (!(nsCRT::IsAsciiAlpha(*scheme) || michael@0: nsCRT::IsAsciiDigit(*scheme) || michael@0: *scheme == '+' || michael@0: *scheme == '.' || michael@0: *scheme == '-')) michael@0: return false; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: bool michael@0: net_FilterURIString(const char *str, nsACString& result) michael@0: { michael@0: NS_PRECONDITION(str, "Must have a non-null string!"); michael@0: bool writing = false; michael@0: result.Truncate(); michael@0: const char *p = str; michael@0: michael@0: // Remove leading spaces, tabs, CR, LF if any. michael@0: while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') { michael@0: writing = true; michael@0: str = p + 1; michael@0: p++; michael@0: } michael@0: michael@0: // Don't strip from the scheme, because other code assumes everything michael@0: // up to the ':' is the scheme, and it's bad not to have it match. michael@0: // If there's no ':', strip. michael@0: bool found_colon = false; michael@0: const char *first = nullptr; michael@0: while (*p) { michael@0: switch (*p) { michael@0: case '\t': michael@0: case '\r': michael@0: case '\n': michael@0: if (found_colon) { michael@0: writing = true; michael@0: // append chars up to but not including *p michael@0: if (p > str) michael@0: result.Append(str, p - str); michael@0: str = p + 1; michael@0: } else { michael@0: // remember where the first \t\r\n was in case we find no scheme michael@0: if (!first) michael@0: first = p; michael@0: } michael@0: break; michael@0: michael@0: case ':': michael@0: found_colon = true; michael@0: break; michael@0: michael@0: case '/': michael@0: case '@': michael@0: if (!found_colon) { michael@0: // colon also has to precede / or @ to be a scheme michael@0: found_colon = true; // not really, but means ok to strip michael@0: if (first) { michael@0: // go back and replace michael@0: p = first; michael@0: continue; // process *p again michael@0: } michael@0: } michael@0: break; michael@0: michael@0: default: michael@0: break; michael@0: } michael@0: p++; michael@0: michael@0: // At end, if there was no scheme, and we hit a control char, fix michael@0: // it up now. michael@0: if (!*p && first != nullptr && !found_colon) { michael@0: // TRICKY - to avoid duplicating code, we reset the loop back michael@0: // to the point we found something to do michael@0: p = first; michael@0: // This also stops us from looping after we finish michael@0: found_colon = true; // so we'll replace \t\r\n michael@0: } michael@0: } michael@0: michael@0: // Remove trailing spaces if any michael@0: while (((p-1) >= str) && (*(p-1) == ' ')) { michael@0: writing = true; michael@0: p--; michael@0: } michael@0: michael@0: if (writing && p > str) michael@0: result.Append(str, p - str); michael@0: michael@0: return writing; michael@0: } michael@0: michael@0: #if defined(XP_WIN) michael@0: bool michael@0: net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf) michael@0: { michael@0: bool writing = false; michael@0: michael@0: nsACString::const_iterator beginIter, endIter; michael@0: aURL.BeginReading(beginIter); michael@0: aURL.EndReading(endIter); michael@0: michael@0: const char *s, *begin = beginIter.get(); michael@0: michael@0: for (s = begin; s != endIter.get(); ++s) michael@0: { michael@0: if (*s == '\\') michael@0: { michael@0: writing = true; michael@0: if (s > begin) michael@0: aResultBuf.Append(begin, s - begin); michael@0: aResultBuf += '/'; michael@0: begin = s + 1; michael@0: } michael@0: } michael@0: if (writing && s > begin) michael@0: aResultBuf.Append(begin, s - begin); michael@0: michael@0: return writing; michael@0: } michael@0: #endif michael@0: michael@0: //---------------------------------------------------------------------------- michael@0: // miscellaneous (i.e., stuff that should really be elsewhere) michael@0: //---------------------------------------------------------------------------- michael@0: michael@0: static inline michael@0: void ToLower(char &c) michael@0: { michael@0: if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A')) michael@0: c += 'a' - 'A'; michael@0: } michael@0: michael@0: void michael@0: net_ToLowerCase(char *str, uint32_t length) michael@0: { michael@0: for (char *end = str + length; str < end; ++str) michael@0: ToLower(*str); michael@0: } michael@0: michael@0: void michael@0: net_ToLowerCase(char *str) michael@0: { michael@0: for (; *str; ++str) michael@0: ToLower(*str); michael@0: } michael@0: michael@0: char * michael@0: net_FindCharInSet(const char *iter, const char *stop, const char *set) michael@0: { michael@0: for (; iter != stop && *iter; ++iter) { michael@0: for (const char *s = set; *s; ++s) { michael@0: if (*iter == *s) michael@0: return (char *) iter; michael@0: } michael@0: } michael@0: return (char *) iter; michael@0: } michael@0: michael@0: char * michael@0: net_FindCharNotInSet(const char *iter, const char *stop, const char *set) michael@0: { michael@0: repeat: michael@0: for (const char *s = set; *s; ++s) { michael@0: if (*iter == *s) { michael@0: if (++iter == stop) michael@0: break; michael@0: goto repeat; michael@0: } michael@0: } michael@0: return (char *) iter; michael@0: } michael@0: michael@0: char * michael@0: net_RFindCharNotInSet(const char *stop, const char *iter, const char *set) michael@0: { michael@0: --iter; michael@0: --stop; michael@0: michael@0: if (iter == stop) michael@0: return (char *) iter; michael@0: michael@0: repeat: michael@0: for (const char *s = set; *s; ++s) { michael@0: if (*iter == *s) { michael@0: if (--iter == stop) michael@0: break; michael@0: goto repeat; michael@0: } michael@0: } michael@0: return (char *) iter; michael@0: } michael@0: michael@0: #define HTTP_LWS " \t" michael@0: michael@0: // Return the index of the closing quote of the string, if any michael@0: static uint32_t michael@0: net_FindStringEnd(const nsCString& flatStr, michael@0: uint32_t stringStart, michael@0: char stringDelim) michael@0: { michael@0: NS_ASSERTION(stringStart < flatStr.Length() && michael@0: flatStr.CharAt(stringStart) == stringDelim && michael@0: (stringDelim == '"' || stringDelim == '\''), michael@0: "Invalid stringStart"); michael@0: michael@0: const char set[] = { stringDelim, '\\', '\0' }; michael@0: do { michael@0: // stringStart points to either the start quote or the last michael@0: // escaped char (the char following a '\\') michael@0: michael@0: // Write to searchStart here, so that when we get back to the michael@0: // top of the loop right outside this one we search from the michael@0: // right place. michael@0: uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1); michael@0: if (stringEnd == uint32_t(kNotFound)) michael@0: return flatStr.Length(); michael@0: michael@0: if (flatStr.CharAt(stringEnd) == '\\') { michael@0: // Hit a backslash-escaped char. Need to skip over it. michael@0: stringStart = stringEnd + 1; michael@0: if (stringStart == flatStr.Length()) michael@0: return stringStart; michael@0: michael@0: // Go back to looking for the next escape or the string end michael@0: continue; michael@0: } michael@0: michael@0: return stringEnd; michael@0: michael@0: } while (true); michael@0: michael@0: NS_NOTREACHED("How did we get here?"); michael@0: return flatStr.Length(); michael@0: } michael@0: michael@0: michael@0: static uint32_t michael@0: net_FindMediaDelimiter(const nsCString& flatStr, michael@0: uint32_t searchStart, michael@0: char delimiter) michael@0: { michael@0: do { michael@0: // searchStart points to the spot from which we should start looking michael@0: // for the delimiter. michael@0: const char delimStr[] = { delimiter, '"', '\0' }; michael@0: uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart); michael@0: if (curDelimPos == uint32_t(kNotFound)) michael@0: return flatStr.Length(); michael@0: michael@0: char ch = flatStr.CharAt(curDelimPos); michael@0: if (ch == delimiter) { michael@0: // Found delimiter michael@0: return curDelimPos; michael@0: } michael@0: michael@0: // We hit the start of a quoted string. Look for its end. michael@0: searchStart = net_FindStringEnd(flatStr, curDelimPos, ch); michael@0: if (searchStart == flatStr.Length()) michael@0: return searchStart; michael@0: michael@0: ++searchStart; michael@0: michael@0: // searchStart now points to the first char after the end of the michael@0: // string, so just go back to the top of the loop and look for michael@0: // |delimiter| again. michael@0: } while (true); michael@0: michael@0: NS_NOTREACHED("How did we get here?"); michael@0: return flatStr.Length(); michael@0: } michael@0: michael@0: // aOffset should be added to aCharsetStart and aCharsetEnd if this michael@0: // function sets them. michael@0: static void michael@0: net_ParseMediaType(const nsACString &aMediaTypeStr, michael@0: nsACString &aContentType, michael@0: nsACString &aContentCharset, michael@0: int32_t aOffset, michael@0: bool *aHadCharset, michael@0: int32_t *aCharsetStart, michael@0: int32_t *aCharsetEnd) michael@0: { michael@0: const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr); michael@0: const char* start = flatStr.get(); michael@0: const char* end = start + flatStr.Length(); michael@0: michael@0: // Trim LWS leading and trailing whitespace from type. We include '(' in michael@0: // the trailing trim set to catch media-type comments, which are not at all michael@0: // standard, but may occur in rare cases. michael@0: const char* type = net_FindCharNotInSet(start, end, HTTP_LWS); michael@0: const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";("); michael@0: michael@0: const char* charset = ""; michael@0: const char* charsetEnd = charset; michael@0: int32_t charsetParamStart = 0; michael@0: int32_t charsetParamEnd = 0; michael@0: michael@0: // Iterate over parameters michael@0: bool typeHasCharset = false; michael@0: uint32_t paramStart = flatStr.FindChar(';', typeEnd - start); michael@0: if (paramStart != uint32_t(kNotFound)) { michael@0: // We have parameters. Iterate over them. michael@0: uint32_t curParamStart = paramStart + 1; michael@0: do { michael@0: uint32_t curParamEnd = michael@0: net_FindMediaDelimiter(flatStr, curParamStart, ';'); michael@0: michael@0: const char* paramName = net_FindCharNotInSet(start + curParamStart, michael@0: start + curParamEnd, michael@0: HTTP_LWS); michael@0: static const char charsetStr[] = "charset="; michael@0: if (PL_strncasecmp(paramName, charsetStr, michael@0: sizeof(charsetStr) - 1) == 0) { michael@0: charset = paramName + sizeof(charsetStr) - 1; michael@0: charsetEnd = start + curParamEnd; michael@0: typeHasCharset = true; michael@0: charsetParamStart = curParamStart - 1; michael@0: charsetParamEnd = curParamEnd; michael@0: } michael@0: michael@0: curParamStart = curParamEnd + 1; michael@0: } while (curParamStart < flatStr.Length()); michael@0: } michael@0: michael@0: bool charsetNeedsQuotedStringUnescaping = false; michael@0: if (typeHasCharset) { michael@0: // Trim LWS leading and trailing whitespace from charset. We include michael@0: // '(' in the trailing trim set to catch media-type comments, which are michael@0: // not at all standard, but may occur in rare cases. michael@0: charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS); michael@0: if (*charset == '"') { michael@0: charsetNeedsQuotedStringUnescaping = true; michael@0: charsetEnd = michael@0: start + net_FindStringEnd(flatStr, charset - start, *charset); michael@0: charset++; michael@0: NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing"); michael@0: } else { michael@0: charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";("); michael@0: } michael@0: } michael@0: michael@0: // if the server sent "*/*", it is meaningless, so do not store it. michael@0: // also, if type is the same as aContentType, then just update the michael@0: // charset. however, if charset is empty and aContentType hasn't michael@0: // changed, then don't wipe-out an existing aContentCharset. We michael@0: // also want to reject a mime-type if it does not include a slash. michael@0: // some servers give junk after the charset parameter, which may michael@0: // include a comma, so this check makes us a bit more tolerant. michael@0: michael@0: if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 && michael@0: memchr(type, '/', typeEnd - type) != nullptr) { michael@0: // Common case here is that aContentType is empty michael@0: bool eq = !aContentType.IsEmpty() && michael@0: aContentType.Equals(Substring(type, typeEnd), michael@0: nsCaseInsensitiveCStringComparator()); michael@0: if (!eq) { michael@0: aContentType.Assign(type, typeEnd - type); michael@0: ToLowerCase(aContentType); michael@0: } michael@0: michael@0: if ((!eq && *aHadCharset) || typeHasCharset) { michael@0: *aHadCharset = true; michael@0: if (charsetNeedsQuotedStringUnescaping) { michael@0: // parameters using the "quoted-string" syntax need michael@0: // backslash-escapes to be unescaped (see RFC 2616 Section 2.2) michael@0: aContentCharset.Truncate(); michael@0: for (const char *c = charset; c != charsetEnd; c++) { michael@0: if (*c == '\\' && c + 1 != charsetEnd) { michael@0: // eat escape michael@0: c++; michael@0: } michael@0: aContentCharset.Append(*c); michael@0: } michael@0: } michael@0: else { michael@0: aContentCharset.Assign(charset, charsetEnd - charset); michael@0: } michael@0: if (typeHasCharset) { michael@0: *aCharsetStart = charsetParamStart + aOffset; michael@0: *aCharsetEnd = charsetParamEnd + aOffset; michael@0: } michael@0: } michael@0: // Only set a new charset position if this is a different type michael@0: // from the last one we had and it doesn't already have a michael@0: // charset param. If this is the same type, we probably want michael@0: // to leave the charset position on its first occurrence. michael@0: if (!eq && !typeHasCharset) { michael@0: int32_t charsetStart = int32_t(paramStart); michael@0: if (charsetStart == kNotFound) michael@0: charsetStart = flatStr.Length(); michael@0: michael@0: *aCharsetEnd = *aCharsetStart = charsetStart + aOffset; michael@0: } michael@0: } michael@0: } michael@0: michael@0: #undef HTTP_LWS michael@0: michael@0: void michael@0: net_ParseContentType(const nsACString &aHeaderStr, michael@0: nsACString &aContentType, michael@0: nsACString &aContentCharset, michael@0: bool *aHadCharset) michael@0: { michael@0: int32_t dummy1, dummy2; michael@0: net_ParseContentType(aHeaderStr, aContentType, aContentCharset, michael@0: aHadCharset, &dummy1, &dummy2); michael@0: } michael@0: michael@0: void michael@0: net_ParseContentType(const nsACString &aHeaderStr, michael@0: nsACString &aContentType, michael@0: nsACString &aContentCharset, michael@0: bool *aHadCharset, michael@0: int32_t *aCharsetStart, michael@0: int32_t *aCharsetEnd) michael@0: { michael@0: // michael@0: // Augmented BNF (from RFC 2616 section 3.7): michael@0: // michael@0: // header-value = media-type *( LWS "," LWS media-type ) michael@0: // media-type = type "/" subtype *( LWS ";" LWS parameter ) michael@0: // type = token michael@0: // subtype = token michael@0: // parameter = attribute "=" value michael@0: // attribute = token michael@0: // value = token | quoted-string michael@0: // michael@0: // michael@0: // Examples: michael@0: // michael@0: // text/html michael@0: // text/html, text/html michael@0: // text/html,text/html; charset=ISO-8859-1 michael@0: // text/html,text/html; charset="ISO-8859-1" michael@0: // text/html;charset=ISO-8859-1, text/html michael@0: // text/html;charset='ISO-8859-1', text/html michael@0: // application/octet-stream michael@0: // michael@0: michael@0: *aHadCharset = false; michael@0: const nsCString& flatStr = PromiseFlatCString(aHeaderStr); michael@0: michael@0: // iterate over media-types. Note that ',' characters can happen michael@0: // inside quoted strings, so we need to watch out for that. michael@0: uint32_t curTypeStart = 0; michael@0: do { michael@0: // curTypeStart points to the start of the current media-type. We want michael@0: // to look for its end. michael@0: uint32_t curTypeEnd = michael@0: net_FindMediaDelimiter(flatStr, curTypeStart, ','); michael@0: michael@0: // At this point curTypeEnd points to the spot where the media-type michael@0: // starting at curTypeEnd ends. Time to parse that! michael@0: net_ParseMediaType(Substring(flatStr, curTypeStart, michael@0: curTypeEnd - curTypeStart), michael@0: aContentType, aContentCharset, curTypeStart, michael@0: aHadCharset, aCharsetStart, aCharsetEnd); michael@0: michael@0: // And let's move on to the next media-type michael@0: curTypeStart = curTypeEnd + 1; michael@0: } while (curTypeStart < flatStr.Length()); michael@0: } michael@0: michael@0: bool michael@0: net_IsValidHostName(const nsCSubstring &host) michael@0: { michael@0: const char *end = host.EndReading(); michael@0: // Use explicit whitelists to select which characters we are michael@0: // willing to send to lower-level DNS logic. This is more michael@0: // self-documenting, and can also be slightly faster than the michael@0: // blacklist approach, since DNS names are the common case, and michael@0: // the commonest characters will tend to be near the start of michael@0: // the list. michael@0: michael@0: // Whitelist for DNS names (RFC 1035) with extra characters added michael@0: // for pragmatic reasons "$+_" michael@0: // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2 michael@0: if (net_FindCharNotInSet(host.BeginReading(), end, michael@0: "abcdefghijklmnopqrstuvwxyz" michael@0: ".-0123456789" michael@0: "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end) michael@0: return true; michael@0: michael@0: // Might be a valid IPv6 link-local address containing a percent sign michael@0: nsAutoCString strhost(host); michael@0: PRNetAddr addr; michael@0: return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS; michael@0: } michael@0: michael@0: bool michael@0: net_IsValidIPv4Addr(const char *addr, int32_t addrLen) michael@0: { michael@0: RangedPtr p(addr, addrLen); michael@0: michael@0: int32_t octet = -1; // means no digit yet michael@0: int32_t dotCount = 0; // number of dots in the address michael@0: michael@0: for (; addrLen; ++p, --addrLen) { michael@0: if (*p == '.') { michael@0: dotCount++; michael@0: if (octet == -1) { michael@0: // invalid octet michael@0: return false; michael@0: } michael@0: octet = -1; michael@0: } else if (*p >= '0' && *p <='9') { michael@0: if (octet == 0) { michael@0: // leading 0 is not allowed michael@0: return false; michael@0: } else if (octet == -1) { michael@0: octet = *p - '0'; michael@0: } else { michael@0: octet *= 10; michael@0: octet += *p - '0'; michael@0: if (octet > 255) michael@0: return false; michael@0: } michael@0: } else { michael@0: // invalid character michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: return (dotCount == 3 && octet != -1); michael@0: } michael@0: michael@0: bool michael@0: net_IsValidIPv6Addr(const char *addr, int32_t addrLen) michael@0: { michael@0: RangedPtr p(addr, addrLen); michael@0: michael@0: int32_t digits = 0; // number of digits in current block michael@0: int32_t colons = 0; // number of colons in a row during parsing michael@0: int32_t blocks = 0; // number of hexadecimal blocks michael@0: bool haveZeros = false; // true if double colon is present in the address michael@0: michael@0: for (; addrLen; ++p, --addrLen) { michael@0: if (*p == ':') { michael@0: if (colons == 0) { michael@0: if (digits != 0) { michael@0: digits = 0; michael@0: blocks++; michael@0: } michael@0: } else if (colons == 1) { michael@0: if (haveZeros) michael@0: return false; // only one occurrence is allowed michael@0: haveZeros = true; michael@0: } else { michael@0: // too many colons in a row michael@0: return false; michael@0: } michael@0: colons++; michael@0: } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') || michael@0: (*p >= 'A' && *p <= 'F')) { michael@0: if (colons == 1 && blocks == 0) // starts with a single colon michael@0: return false; michael@0: if (digits == 4) // too many digits michael@0: return false; michael@0: colons = 0; michael@0: digits++; michael@0: } else if (*p == '.') { michael@0: // check valid IPv4 from the beginning of the last block michael@0: if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits)) michael@0: return false; michael@0: return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6); michael@0: } else { michael@0: // invalid character michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: if (colons == 1) // ends with a single colon michael@0: return false; michael@0: michael@0: if (digits) // there is a block at the end michael@0: blocks++; michael@0: michael@0: return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8); michael@0: }