The Tor Browser: netwerk/base/src/nsURLHelper.cpp@b8a032363ba2

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

     2 /* vim:set ts=4 sw=4 sts=4 et cindent: */

     3 /* This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 #include "mozilla/RangedPtr.h"

     9 #include "nsURLHelper.h"

    10 #include "nsIFile.h"

    11 #include "nsIURLParser.h"

    12 #include "nsCOMPtr.h"

    13 #include "nsCRT.h"

    14 #include "nsNetCID.h"

    15 #include "prnetdb.h"

    17 using namespace mozilla;

    19 //----------------------------------------------------------------------------

    20 // Init/Shutdown

    21 //----------------------------------------------------------------------------

    23 static bool gInitialized = false;

    24 static nsIURLParser *gNoAuthURLParser = nullptr;

    25 static nsIURLParser *gAuthURLParser = nullptr;

    26 static nsIURLParser *gStdURLParser = nullptr;

    28 static void

    29 InitGlobals()

    30 {

    31     nsCOMPtr<nsIURLParser> parser;

    33     parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);

    34     NS_ASSERTION(parser, "failed getting 'noauth' url parser");

    35     if (parser) {

    36         gNoAuthURLParser = parser.get();

    37         NS_ADDREF(gNoAuthURLParser);

    38     }

    40     parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);

    41     NS_ASSERTION(parser, "failed getting 'auth' url parser");

    42     if (parser) {

    43         gAuthURLParser = parser.get();

    44         NS_ADDREF(gAuthURLParser);

    45     }

    47     parser = do_GetService(NS_STDURLPARSER_CONTRACTID);

    48     NS_ASSERTION(parser, "failed getting 'std' url parser");

    49     if (parser) {

    50         gStdURLParser = parser.get();

    51         NS_ADDREF(gStdURLParser);

    52     }

    54     gInitialized = true;

    55 }

    57 void

    58 net_ShutdownURLHelper()

    59 {

    60     if (gInitialized) {

    61         NS_IF_RELEASE(gNoAuthURLParser);

    62         NS_IF_RELEASE(gAuthURLParser);

    63         NS_IF_RELEASE(gStdURLParser);

    64         gInitialized = false;

    65     }

    66 }

    68 //----------------------------------------------------------------------------

    69 // nsIURLParser getters

    70 //----------------------------------------------------------------------------

    72 nsIURLParser *

    73 net_GetAuthURLParser()

    74 {

    75     if (!gInitialized)

    76         InitGlobals();

    77     return gAuthURLParser;

    78 }

    80 nsIURLParser *

    81 net_GetNoAuthURLParser()

    82 {

    83     if (!gInitialized)

    84         InitGlobals();

    85     return gNoAuthURLParser;

    86 }

    88 nsIURLParser *

    89 net_GetStdURLParser()

    90 {

    91     if (!gInitialized)

    92         InitGlobals();

    93     return gStdURLParser;

    94 }

    96 //---------------------------------------------------------------------------

    97 // GetFileFromURLSpec implementations

    98 //---------------------------------------------------------------------------

    99 nsresult

   100 net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result)

   101 {

   102     nsAutoCString escPath;

   103     nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);

   104     if (NS_FAILED(rv))

   105         return rv;

   107     if (escPath.Last() != '/') {

   108         escPath += '/';

   109     }

   111     result = escPath;

   112     return NS_OK;

   113 }

   115 nsresult

   116 net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result)

   117 {

   118     nsAutoCString escPath;

   119     nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);

   120     if (NS_FAILED(rv))

   121         return rv;

   123     // if this file references a directory, then we need to ensure that the

   124     // URL ends with a slash.  this is important since it affects the rules

   125     // for relative URL resolution when this URL is used as a base URL.

   126     // if the file does not exist, then we make no assumption about its type,

   127     // and simply leave the URL unmodified.

   128     if (escPath.Last() != '/') {

   129         bool dir;

   130         rv = aFile->IsDirectory(&dir);

   131         if (NS_SUCCEEDED(rv) && dir)

   132             escPath += '/';

   133     }

   135     result = escPath;

   136     return NS_OK;

   137 }

   139 //----------------------------------------------------------------------------

   140 // file:// URL parsing

   141 //----------------------------------------------------------------------------

   143 nsresult

   144 net_ParseFileURL(const nsACString &inURL,

   145                  nsACString &outDirectory,

   146                  nsACString &outFileBaseName,

   147                  nsACString &outFileExtension)

   148 {

   149     nsresult rv;

   151     outDirectory.Truncate();

   152     outFileBaseName.Truncate();

   153     outFileExtension.Truncate();

   155     const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);

   156     const char *url = flatURL.get();

   158     uint32_t schemeBeg, schemeEnd;

   159     rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);

   160     if (NS_FAILED(rv)) return rv;

   162     if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {

   163         NS_ERROR("must be a file:// url");

   164         return NS_ERROR_UNEXPECTED;

   165     }

   167     nsIURLParser *parser = net_GetNoAuthURLParser();

   168     NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);

   170     uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;

   171     int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;

   173     // invoke the parser to extract the URL path

   174     rv = parser->ParseURL(url, flatURL.Length(),

   175                           nullptr, nullptr, // don't care about scheme

   176                           nullptr, nullptr, // don't care about authority

   177                           &pathPos, &pathLen);

   178     if (NS_FAILED(rv)) return rv;

   180     // invoke the parser to extract filepath from the path

   181     rv = parser->ParsePath(url + pathPos, pathLen,

   182                            &filepathPos, &filepathLen,

   183                            nullptr, nullptr,  // don't care about query

   184                            nullptr, nullptr); // don't care about ref

   185     if (NS_FAILED(rv)) return rv;

   187     filepathPos += pathPos;

   189     // invoke the parser to extract the directory and filename from filepath

   190     rv = parser->ParseFilePath(url + filepathPos, filepathLen,

   191                                &directoryPos, &directoryLen,

   192                                &basenamePos, &basenameLen,

   193                                &extensionPos, &extensionLen);

   194     if (NS_FAILED(rv)) return rv;

   196     if (directoryLen > 0)

   197         outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);

   198     if (basenameLen > 0)

   199         outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);

   200     if (extensionLen > 0)

   201         outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);

   202     // since we are using a no-auth url parser, there will never be a host

   203     // XXX not strictly true... file://localhost/foo/bar.html is a valid URL

   205     return NS_OK;

   206 }

   208 //----------------------------------------------------------------------------

   209 // path manipulation functions

   210 //----------------------------------------------------------------------------

   212 // Replace all /./ with a / while resolving URLs

   213 // But only till #?

   214 void

   215 net_CoalesceDirs(netCoalesceFlags flags, char* path)

   216 {

   217     /* Stolen from the old netlib's mkparse.c.

   218      *

   219      * modifies a url of the form   /foo/../foo1  ->  /foo1

   220      *                       and    /foo/./foo1   ->  /foo/foo1

   221      *                       and    /foo/foo1/..  ->  /foo/

   222      */

   223     char *fwdPtr = path;

   224     char *urlPtr = path;

   225     char *lastslash = path;

   226     uint32_t traversal = 0;

   227     uint32_t special_ftp_len = 0;

   229     /* Remember if this url is a special ftp one: */

   230     if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT)

   231     {

   232        /* some schemes (for example ftp) have the speciality that

   233           the path can begin // or /%2F to mark the root of the

   234           servers filesystem, a simple / only marks the root relative

   235           to the user loging in. We remember the length of the marker */

   236         if (nsCRT::strncasecmp(path,"/%2F",4) == 0)

   237             special_ftp_len = 4;

   238         else if (nsCRT::strncmp(path,"//",2) == 0 )

   239             special_ftp_len = 2;

   240     }

   242     /* find the last slash before # or ? */

   243     for(; (*fwdPtr != '\0') &&

   244             (*fwdPtr != '?') &&

   245             (*fwdPtr != '#'); ++fwdPtr)

   246     {

   247     }

   249     /* found nothing, but go back one only */

   250     /* if there is something to go back to */

   251     if (fwdPtr != path && *fwdPtr == '\0')

   252     {

   253         --fwdPtr;

   254     }

   256     /* search the slash */

   257     for(; (fwdPtr != path) &&

   258             (*fwdPtr != '/'); --fwdPtr)

   259     {

   260     }

   261     lastslash = fwdPtr;

   262     fwdPtr = path;

   264     /* replace all %2E or %2e with . in the path */

   265     /* but stop at lastchar if non null */

   266     for(; (*fwdPtr != '\0') &&

   267             (*fwdPtr != '?') &&

   268             (*fwdPtr != '#') &&

   269             (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)

   270     {

   271         if (*fwdPtr == '%' && *(fwdPtr+1) == '2' &&

   272             (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))

   273         {

   274             *urlPtr++ = '.';

   275             ++fwdPtr;

   276             ++fwdPtr;

   277         }

   278         else

   279         {

   280             *urlPtr++ = *fwdPtr;

   281         }

   282     }

   283     // Copy remaining stuff past the #?;

   284     for (; *fwdPtr != '\0'; ++fwdPtr)

   285     {

   286         *urlPtr++ = *fwdPtr;

   287     }

   288     *urlPtr = '\0';  // terminate the url

   290     // start again, this time for real

   291     fwdPtr = path;

   292     urlPtr = path;

   294     for(; (*fwdPtr != '\0') &&

   295             (*fwdPtr != '?') &&

   296             (*fwdPtr != '#'); ++fwdPtr)

   297     {

   298         if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )

   299         {

   300             // remove . followed by slash

   301             ++fwdPtr;

   302         }

   303         else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' &&

   304                 (*(fwdPtr+3) == '/' ||

   305                     *(fwdPtr+3) == '\0' || // This will take care of

   306                     *(fwdPtr+3) == '?' ||  // something like foo/bar/..#sometag

   307                     *(fwdPtr+3) == '#'))

   308         {

   309             // remove foo/..

   310             // reverse the urlPtr to the previous slash if possible

   311             // if url does not allow relative root then drop .. above root

   312             // otherwise retain them in the path

   313             if(traversal > 0 || !(flags &

   314                                   NET_COALESCE_ALLOW_RELATIVE_ROOT))

   315             {

   316                 if (urlPtr != path)

   317                     urlPtr--; // we must be going back at least by one

   318                 for(;*urlPtr != '/' && urlPtr != path; urlPtr--)

   319                     ;  // null body

   320                 --traversal; // count back

   321                 // forward the fwdPtr past the ../

   322                 fwdPtr += 2;

   323                 // if we have reached the beginning of the path

   324                 // while searching for the previous / and we remember

   325                 // that it is an url that begins with /%2F then

   326                 // advance urlPtr again by 3 chars because /%2F already

   327                 // marks the root of the path

   328                 if (urlPtr == path && special_ftp_len > 3)

   329                 {

   330                     ++urlPtr;

   331                     ++urlPtr;

   332                     ++urlPtr;

   333                 }

   334                 // special case if we have reached the end

   335                 // to preserve the last /

   336                 if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')

   337                     ++urlPtr;

   338             }

   339             else

   340             {

   341                 // there are to much /.. in this path, just copy them instead.

   342                 // forward the urlPtr past the /.. and copying it

   344                 // However if we remember it is an url that starts with

   345                 // /%2F and urlPtr just points at the "F" of "/%2F" then do

   346                 // not overwrite it with the /, just copy .. and move forward

   347                 // urlPtr.

   348                 if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)

   349                     ++urlPtr;

   350                 else

   351                     *urlPtr++ = *fwdPtr;

   352                 ++fwdPtr;

   353                 *urlPtr++ = *fwdPtr;

   354                 ++fwdPtr;

   355                 *urlPtr++ = *fwdPtr;

   356             }

   357         }

   358         else

   359         {

   360             // count the hierachie, but only if we do not have reached

   361             // the root of some special urls with a special root marker

   362             if (*fwdPtr == '/' &&  *(fwdPtr+1) != '.' &&

   363                (special_ftp_len != 2 || *(fwdPtr+1) != '/'))

   364                 traversal++;

   365             // copy the url incrementaly

   366             *urlPtr++ = *fwdPtr;

   367         }

   368     }

   370     /*

   371      *  Now lets remove trailing . case

   372      *     /foo/foo1/.   ->  /foo/foo1/

   373      */

   375     if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))

   376         urlPtr--;

   378     // Copy remaining stuff past the #?;

   379     for (; *fwdPtr != '\0'; ++fwdPtr)

   380     {

   381         *urlPtr++ = *fwdPtr;

   382     }

   383     *urlPtr = '\0';  // terminate the url

   384 }

   386 nsresult

   387 net_ResolveRelativePath(const nsACString &relativePath,

   388                         const nsACString &basePath,

   389                         nsACString &result)

   390 {

   391     nsAutoCString name;

   392     nsAutoCString path(basePath);

   393     bool needsDelim = false;

   395     if ( !path.IsEmpty() ) {

   396         char16_t last = path.Last();

   397         needsDelim = !(last == '/');

   398     }

   400     nsACString::const_iterator beg, end;

   401     relativePath.BeginReading(beg);

   402     relativePath.EndReading(end);

   404     bool stop = false;

   405     char c;

   406     for (; !stop; ++beg) {

   407         c = (beg == end) ? '\0' : *beg;

   408         //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());

   409         switch (c) {

   410           case '\0':

   411           case '#':

   412           case '?':

   413             stop = true;

   414             // fall through...

   415           case '/':

   416             // delimiter found

   417             if (name.EqualsLiteral("..")) {

   418                 // pop path

   419                 // If we already have the delim at end, then

   420                 //  skip over that when searching for next one to the left

   421                 int32_t offset = path.Length() - (needsDelim ? 1 : 2);

   422                 // First check for errors

   423                 if (offset < 0 )

   424                     return NS_ERROR_MALFORMED_URI;

   425                 int32_t pos = path.RFind("/", false, offset);

   426                 if (pos >= 0)

   427                     path.Truncate(pos + 1);

   428                 else

   429                     path.Truncate();

   430             }

   431             else if (name.IsEmpty() || name.EqualsLiteral(".")) {

   432                 // do nothing

   433             }

   434             else {

   435                 // append name to path

   436                 if (needsDelim)

   437                     path += '/';

   438                 path += name;

   439                 needsDelim = true;

   440             }

   441             name.Truncate();

   442             break;

   444           default:

   445             // append char to name

   446             name += c;

   447         }

   448     }

   449     // append anything left on relativePath (e.g. #..., ;..., ?...)

   450     if (c != '\0')

   451         path += Substring(--beg, end);

   453     result = path;

   454     return NS_OK;

   455 }

   457 //----------------------------------------------------------------------------

   458 // scheme fu

   459 //----------------------------------------------------------------------------

   461 /* Extract URI-Scheme if possible */

   462 nsresult

   463 net_ExtractURLScheme(const nsACString &inURI,

   464                      uint32_t *startPos,

   465                      uint32_t *endPos,

   466                      nsACString *scheme)

   467 {

   468     // search for something up to a colon, and call it the scheme

   469     const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);

   470     const char* uri_start = flatURI.get();

   471     const char* uri = uri_start;

   473     if (!uri)

   474         return NS_ERROR_MALFORMED_URI;

   476     // skip leading white space

   477     while (nsCRT::IsAsciiSpace(*uri))

   478         uri++;

   480     uint32_t start = uri - uri_start;

   481     if (startPos) {

   482         *startPos = start;

   483     }

   485     uint32_t length = 0;

   486     char c;

   487     while ((c = *uri++) != '\0') {

   488         // First char must be Alpha

   489         if (length == 0 && nsCRT::IsAsciiAlpha(c)) {

   490             length++;

   491         }

   492         // Next chars can be alpha + digit + some special chars

   493         else if (length > 0 && (nsCRT::IsAsciiAlpha(c) ||

   494                  nsCRT::IsAsciiDigit(c) || c == '+' ||

   495                  c == '.' || c == '-')) {

   496             length++;

   497         }

   498         // stop if colon reached but not as first char

   499         else if (c == ':' && length > 0) {

   500             if (endPos) {

   501                 *endPos = start + length;

   502             }

   504             if (scheme)

   505                 scheme->Assign(Substring(inURI, start, length));

   506             return NS_OK;

   507         }

   508         else

   509             break;

   510     }

   511     return NS_ERROR_MALFORMED_URI;

   512 }

   514 bool

   515 net_IsValidScheme(const char *scheme, uint32_t schemeLen)

   516 {

   517     // first char must be alpha

   518     if (!nsCRT::IsAsciiAlpha(*scheme))

   519         return false;

   521     // nsCStrings may have embedded nulls -- reject those too

   522     for (; schemeLen; ++scheme, --schemeLen) {

   523         if (!(nsCRT::IsAsciiAlpha(*scheme) ||

   524               nsCRT::IsAsciiDigit(*scheme) ||

   525               *scheme == '+' ||

   526               *scheme == '.' ||

   527               *scheme == '-'))

   528             return false;

   529     }

   531     return true;

   532 }

   534 bool

   535 net_FilterURIString(const char *str, nsACString& result)

   536 {

   537     NS_PRECONDITION(str, "Must have a non-null string!");

   538     bool writing = false;

   539     result.Truncate();

   540     const char *p = str;

   542     // Remove leading spaces, tabs, CR, LF if any.

   543     while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {

   544         writing = true;

   545         str = p + 1;

   546         p++;

   547     }

   549     // Don't strip from the scheme, because other code assumes everything

   550     // up to the ':' is the scheme, and it's bad not to have it match.

   551     // If there's no ':', strip.

   552     bool found_colon = false;

   553     const char *first = nullptr;

   554     while (*p) {

   555         switch (*p) {

   556             case '\t':

   557             case '\r':

   558             case '\n':

   559                 if (found_colon) {

   560                     writing = true;

   561                     // append chars up to but not including *p

   562                     if (p > str)

   563                         result.Append(str, p - str);

   564                     str = p + 1;

   565                 } else {

   566                     // remember where the first \t\r\n was in case we find no scheme

   567                     if (!first)

   568                         first = p;

   569                 }

   570                 break;

   572             case ':':

   573                 found_colon = true;

   574                 break;

   576             case '/':

   577             case '@':

   578                 if (!found_colon) {

   579                     // colon also has to precede / or @ to be a scheme

   580                     found_colon = true; // not really, but means ok to strip

   581                     if (first) {

   582                         // go back and replace

   583                         p = first;

   584                         continue; // process *p again

   585                     }

   586                 }

   587                 break;

   589             default:

   590                 break;

   591         }

   592         p++;

   594         // At end, if there was no scheme, and we hit a control char, fix

   595         // it up now.

   596         if (!*p && first != nullptr && !found_colon) {

   597             // TRICKY - to avoid duplicating code, we reset the loop back

   598             // to the point we found something to do

   599             p = first;

   600             // This also stops us from looping after we finish

   601             found_colon = true; // so we'll replace \t\r\n

   602         }

   603     }

   605     // Remove trailing spaces if any

   606     while (((p-1) >= str) && (*(p-1) == ' ')) {

   607         writing = true;

   608         p--;

   609     }

   611     if (writing && p > str)

   612         result.Append(str, p - str);

   614     return writing;

   615 }

   617 #if defined(XP_WIN)

   618 bool

   619 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)

   620 {

   621     bool writing = false;

   623     nsACString::const_iterator beginIter, endIter;

   624     aURL.BeginReading(beginIter);

   625     aURL.EndReading(endIter);

   627     const char *s, *begin = beginIter.get();

   629     for (s = begin; s != endIter.get(); ++s)

   630     {

   631         if (*s == '\\')

   632         {

   633             writing = true;

   634             if (s > begin)

   635                 aResultBuf.Append(begin, s - begin);

   636             aResultBuf += '/';

   637             begin = s + 1;

   638         }

   639     }

   640     if (writing && s > begin)

   641         aResultBuf.Append(begin, s - begin);

   643     return writing;

   644 }

   645 #endif

   647 //----------------------------------------------------------------------------

   648 // miscellaneous (i.e., stuff that should really be elsewhere)

   649 //----------------------------------------------------------------------------

   651 static inline

   652 void ToLower(char &c)

   653 {

   654     if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))

   655         c += 'a' - 'A';

   656 }

   658 void

   659 net_ToLowerCase(char *str, uint32_t length)

   660 {

   661     for (char *end = str + length; str < end; ++str)

   662         ToLower(*str);

   663 }

   665 void

   666 net_ToLowerCase(char *str)

   667 {

   668     for (; *str; ++str)

   669         ToLower(*str);

   670 }

   672 char *

   673 net_FindCharInSet(const char *iter, const char *stop, const char *set)

   674 {

   675     for (; iter != stop && *iter; ++iter) {

   676         for (const char *s = set; *s; ++s) {

   677             if (*iter == *s)

   678                 return (char *) iter;

   679         }

   680     }

   681     return (char *) iter;

   682 }

   684 char *

   685 net_FindCharNotInSet(const char *iter, const char *stop, const char *set)

   686 {

   687 repeat:

   688     for (const char *s = set; *s; ++s) {

   689         if (*iter == *s) {

   690             if (++iter == stop)

   691                 break;

   692             goto repeat;

   693         }

   694     }

   695     return (char *) iter;

   696 }

   698 char *

   699 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)

   700 {

   701     --iter;

   702     --stop;

   704     if (iter == stop)

   705         return (char *) iter;

   707 repeat:

   708     for (const char *s = set; *s; ++s) {

   709         if (*iter == *s) {

   710             if (--iter == stop)

   711                 break;

   712             goto repeat;

   713         }

   714     }

   715     return (char *) iter;

   716 }

   718 #define HTTP_LWS " \t"

   720 // Return the index of the closing quote of the string, if any

   721 static uint32_t

   722 net_FindStringEnd(const nsCString& flatStr,

   723                   uint32_t stringStart,

   724                   char stringDelim)

   725 {

   726     NS_ASSERTION(stringStart < flatStr.Length() &&

   727                  flatStr.CharAt(stringStart) == stringDelim &&

   728                  (stringDelim == '"' || stringDelim == '\''),

   729                  "Invalid stringStart");

   731     const char set[] = { stringDelim, '\\', '\0' };

   732     do {

   733         // stringStart points to either the start quote or the last

   734         // escaped char (the char following a '\\')

   736         // Write to searchStart here, so that when we get back to the

   737         // top of the loop right outside this one we search from the

   738         // right place.

   739         uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);

   740         if (stringEnd == uint32_t(kNotFound))

   741             return flatStr.Length();

   743         if (flatStr.CharAt(stringEnd) == '\\') {

   744             // Hit a backslash-escaped char.  Need to skip over it.

   745             stringStart = stringEnd + 1;

   746             if (stringStart == flatStr.Length())

   747                 return stringStart;

   749             // Go back to looking for the next escape or the string end

   750             continue;

   751         }

   753         return stringEnd;

   755     } while (true);

   757     NS_NOTREACHED("How did we get here?");

   758     return flatStr.Length();

   759 }

   762 static uint32_t

   763 net_FindMediaDelimiter(const nsCString& flatStr,

   764                        uint32_t searchStart,

   765                        char delimiter)

   766 {

   767     do {

   768         // searchStart points to the spot from which we should start looking

   769         // for the delimiter.

   770         const char delimStr[] = { delimiter, '"', '\0' };

   771         uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);

   772         if (curDelimPos == uint32_t(kNotFound))

   773             return flatStr.Length();

   775         char ch = flatStr.CharAt(curDelimPos);

   776         if (ch == delimiter) {

   777             // Found delimiter

   778             return curDelimPos;

   779         }

   781         // We hit the start of a quoted string.  Look for its end.

   782         searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);

   783         if (searchStart == flatStr.Length())

   784             return searchStart;

   786         ++searchStart;

   788         // searchStart now points to the first char after the end of the

   789         // string, so just go back to the top of the loop and look for

   790         // |delimiter| again.

   791     } while (true);

   793     NS_NOTREACHED("How did we get here?");

   794     return flatStr.Length();

   795 }

   797 // aOffset should be added to aCharsetStart and aCharsetEnd if this

   798 // function sets them.

   799 static void

   800 net_ParseMediaType(const nsACString &aMediaTypeStr,

   801                    nsACString       &aContentType,

   802                    nsACString       &aContentCharset,

   803                    int32_t          aOffset,

   804                    bool             *aHadCharset,

   805                    int32_t          *aCharsetStart,

   806                    int32_t          *aCharsetEnd)

   807 {

   808     const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);

   809     const char* start = flatStr.get();

   810     const char* end = start + flatStr.Length();

   812     // Trim LWS leading and trailing whitespace from type.  We include '(' in

   813     // the trailing trim set to catch media-type comments, which are not at all

   814     // standard, but may occur in rare cases.

   815     const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);

   816     const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");

   818     const char* charset = "";

   819     const char* charsetEnd = charset;

   820     int32_t charsetParamStart = 0;

   821     int32_t charsetParamEnd = 0;

   823     // Iterate over parameters

   824     bool typeHasCharset = false;

   825     uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);

   826     if (paramStart != uint32_t(kNotFound)) {

   827         // We have parameters.  Iterate over them.

   828         uint32_t curParamStart = paramStart + 1;

   829         do {

   830             uint32_t curParamEnd =

   831                 net_FindMediaDelimiter(flatStr, curParamStart, ';');

   833             const char* paramName = net_FindCharNotInSet(start + curParamStart,

   834                                                          start + curParamEnd,

   835                                                          HTTP_LWS);

   836             static const char charsetStr[] = "charset=";

   837             if (PL_strncasecmp(paramName, charsetStr,

   838                                sizeof(charsetStr) - 1) == 0) {

   839                 charset = paramName + sizeof(charsetStr) - 1;

   840                 charsetEnd = start + curParamEnd;

   841                 typeHasCharset = true;

   842                 charsetParamStart = curParamStart - 1;

   843                 charsetParamEnd = curParamEnd;

   844             }

   846             curParamStart = curParamEnd + 1;

   847         } while (curParamStart < flatStr.Length());

   848     }

   850     bool charsetNeedsQuotedStringUnescaping = false;

   851     if (typeHasCharset) {

   852         // Trim LWS leading and trailing whitespace from charset.  We include

   853         // '(' in the trailing trim set to catch media-type comments, which are

   854         // not at all standard, but may occur in rare cases.

   855         charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);

   856         if (*charset == '"') {

   857             charsetNeedsQuotedStringUnescaping = true;

   858             charsetEnd =

   859                 start + net_FindStringEnd(flatStr, charset - start, *charset);

   860             charset++;

   861             NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");

   862         } else {

   863             charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");

   864         }

   865     }

   867     // if the server sent "*/*", it is meaningless, so do not store it.

   868     // also, if type is the same as aContentType, then just update the

   869     // charset.  however, if charset is empty and aContentType hasn't

   870     // changed, then don't wipe-out an existing aContentCharset.  We

   871     // also want to reject a mime-type if it does not include a slash.

   872     // some servers give junk after the charset parameter, which may

   873     // include a comma, so this check makes us a bit more tolerant.

   875     if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&

   876         memchr(type, '/', typeEnd - type) != nullptr) {

   877         // Common case here is that aContentType is empty

   878         bool eq = !aContentType.IsEmpty() &&

   879             aContentType.Equals(Substring(type, typeEnd),

   880                                 nsCaseInsensitiveCStringComparator());

   881         if (!eq) {

   882             aContentType.Assign(type, typeEnd - type);

   883             ToLowerCase(aContentType);

   884         }

   886         if ((!eq && *aHadCharset) || typeHasCharset) {

   887             *aHadCharset = true;

   888             if (charsetNeedsQuotedStringUnescaping) {

   889                 // parameters using the "quoted-string" syntax need

   890                 // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)

   891                 aContentCharset.Truncate();

   892                 for (const char *c = charset; c != charsetEnd; c++) {

   893                     if (*c == '\\' && c + 1 != charsetEnd) {

   894                         // eat escape

   895                         c++;

   896                     }

   897                     aContentCharset.Append(*c);

   898                 }

   899             }

   900             else {

   901                 aContentCharset.Assign(charset, charsetEnd - charset);

   902             }

   903             if (typeHasCharset) {

   904                 *aCharsetStart = charsetParamStart + aOffset;

   905                 *aCharsetEnd = charsetParamEnd + aOffset;

   906             }

   907         }

   908         // Only set a new charset position if this is a different type

   909         // from the last one we had and it doesn't already have a

   910         // charset param.  If this is the same type, we probably want

   911         // to leave the charset position on its first occurrence.

   912         if (!eq && !typeHasCharset) {

   913             int32_t charsetStart = int32_t(paramStart);

   914             if (charsetStart == kNotFound)

   915                 charsetStart =  flatStr.Length();

   917             *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;

   918         }

   919     }

   920 }

   922 #undef HTTP_LWS

   924 void

   925 net_ParseContentType(const nsACString &aHeaderStr,

   926                      nsACString       &aContentType,

   927                      nsACString       &aContentCharset,

   928                      bool             *aHadCharset)

   929 {

   930     int32_t dummy1, dummy2;

   931     net_ParseContentType(aHeaderStr, aContentType, aContentCharset,

   932                          aHadCharset, &dummy1, &dummy2);

   933 }

   935 void

   936 net_ParseContentType(const nsACString &aHeaderStr,

   937                      nsACString       &aContentType,

   938                      nsACString       &aContentCharset,

   939                      bool             *aHadCharset,

   940                      int32_t          *aCharsetStart,

   941                      int32_t          *aCharsetEnd)

   942 {

   943     //

   944     // Augmented BNF (from RFC 2616 section 3.7):

   945     //

   946     //   header-value = media-type *( LWS "," LWS media-type )

   947     //   media-type   = type "/" subtype *( LWS ";" LWS parameter )

   948     //   type         = token

   949     //   subtype      = token

   950     //   parameter    = attribute "=" value

   951     //   attribute    = token

   952     //   value        = token | quoted-string

   953     //

   954     //

   955     // Examples:

   956     //

   957     //   text/html

   958     //   text/html, text/html

   959     //   text/html,text/html; charset=ISO-8859-1

   960     //   text/html,text/html; charset="ISO-8859-1"

   961     //   text/html;charset=ISO-8859-1, text/html

   962     //   text/html;charset='ISO-8859-1', text/html

   963     //   application/octet-stream

   964     //

   966     *aHadCharset = false;

   967     const nsCString& flatStr = PromiseFlatCString(aHeaderStr);

   969     // iterate over media-types.  Note that ',' characters can happen

   970     // inside quoted strings, so we need to watch out for that.

   971     uint32_t curTypeStart = 0;

   972     do {

   973         // curTypeStart points to the start of the current media-type.  We want

   974         // to look for its end.

   975         uint32_t curTypeEnd =

   976             net_FindMediaDelimiter(flatStr, curTypeStart, ',');

   978         // At this point curTypeEnd points to the spot where the media-type

   979         // starting at curTypeEnd ends.  Time to parse that!

   980         net_ParseMediaType(Substring(flatStr, curTypeStart,

   981                                      curTypeEnd - curTypeStart),

   982                            aContentType, aContentCharset, curTypeStart,

   983                            aHadCharset, aCharsetStart, aCharsetEnd);

   985         // And let's move on to the next media-type

   986         curTypeStart = curTypeEnd + 1;

   987     } while (curTypeStart < flatStr.Length());

   988 }

   990 bool

   991 net_IsValidHostName(const nsCSubstring &host)

   992 {

   993     const char *end = host.EndReading();

   994     // Use explicit whitelists to select which characters we are

   995     // willing to send to lower-level DNS logic. This is more

   996     // self-documenting, and can also be slightly faster than the

   997     // blacklist approach, since DNS names are the common case, and

   998     // the commonest characters will tend to be near the start of

   999     // the list.

  1001     // Whitelist for DNS names (RFC 1035) with extra characters added

  1002     // for pragmatic reasons "$+_"

  1003     // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2

  1004     if (net_FindCharNotInSet(host.BeginReading(), end,

  1005                              "abcdefghijklmnopqrstuvwxyz"

  1006                              ".-0123456789"

  1007                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)

  1008         return true;

  1010     // Might be a valid IPv6 link-local address containing a percent sign

  1011     nsAutoCString strhost(host);

  1012     PRNetAddr addr;

  1013     return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;

  1014 }

  1016 bool

  1017 net_IsValidIPv4Addr(const char *addr, int32_t addrLen)

  1018 {

  1019     RangedPtr<const char> p(addr, addrLen);

  1021     int32_t octet = -1;   // means no digit yet

  1022     int32_t dotCount = 0; // number of dots in the address

  1024     for (; addrLen; ++p, --addrLen) {

  1025         if (*p == '.') {

  1026             dotCount++;

  1027             if (octet == -1) {

  1028                 // invalid octet

  1029                 return false;

  1030             }

  1031             octet = -1;

  1032         } else if (*p >= '0' && *p <='9') {

  1033             if (octet == 0) {

  1034                 // leading 0 is not allowed

  1035                 return false;

  1036             } else if (octet == -1) {

  1037                 octet = *p - '0';

  1038             } else {

  1039                 octet *= 10;

  1040                 octet += *p - '0';

  1041                 if (octet > 255)

  1042                     return false;

  1043             }

  1044         } else {

  1045             // invalid character

  1046             return false;

  1047         }

  1048     }

  1050     return (dotCount == 3 && octet != -1);

  1051 }

  1053 bool

  1054 net_IsValidIPv6Addr(const char *addr, int32_t addrLen)

  1055 {

  1056     RangedPtr<const char> p(addr, addrLen);

  1058     int32_t digits = 0; // number of digits in current block

  1059     int32_t colons = 0; // number of colons in a row during parsing

  1060     int32_t blocks = 0; // number of hexadecimal blocks

  1061     bool haveZeros = false; // true if double colon is present in the address

  1063     for (; addrLen; ++p, --addrLen) {

  1064         if (*p == ':') {

  1065             if (colons == 0) {

  1066                 if (digits != 0) {

  1067                     digits = 0;

  1068                     blocks++;

  1069                 }

  1070             } else if (colons == 1) {

  1071                 if (haveZeros)

  1072                     return false; // only one occurrence is allowed

  1073                 haveZeros = true;

  1074             } else {

  1075                 // too many colons in a row

  1076                 return false;

  1077             }

  1078             colons++;

  1079         } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||

  1080                    (*p >= 'A' && *p <= 'F')) {

  1081             if (colons == 1 && blocks == 0) // starts with a single colon

  1082                 return false;

  1083             if (digits == 4) // too many digits

  1084                 return false;

  1085             colons = 0;

  1086             digits++;

  1087         } else if (*p == '.') {

  1088             // check valid IPv4 from the beginning of the last block

  1089             if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits))

  1090                 return false;

  1091             return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6);

  1092         } else {

  1093             // invalid character

  1094             return false;

  1095         }

  1096     }

  1098     if (colons == 1) // ends with a single colon

  1099         return false;

  1101     if (digits) // there is a block at the end

  1102         blocks++;

  1104     return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8);

  1105 }

The Tor Browser / file revision

netwerk/base/src/nsURLHelper.cpp@b8a032363ba2

netwerk/base/src/nsURLHelper.cpp