netwerk/base/src/nsURLHelper.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     2 /* vim:set ts=4 sw=4 sts=4 et cindent: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #include "mozilla/RangedPtr.h"
     9 #include "nsURLHelper.h"
    10 #include "nsIFile.h"
    11 #include "nsIURLParser.h"
    12 #include "nsCOMPtr.h"
    13 #include "nsCRT.h"
    14 #include "nsNetCID.h"
    15 #include "prnetdb.h"
    17 using namespace mozilla;
    19 //----------------------------------------------------------------------------
    20 // Init/Shutdown
    21 //----------------------------------------------------------------------------
    23 static bool gInitialized = false;
    24 static nsIURLParser *gNoAuthURLParser = nullptr;
    25 static nsIURLParser *gAuthURLParser = nullptr;
    26 static nsIURLParser *gStdURLParser = nullptr;
    28 static void
    29 InitGlobals()
    30 {
    31     nsCOMPtr<nsIURLParser> parser;
    33     parser = do_GetService(NS_NOAUTHURLPARSER_CONTRACTID);
    34     NS_ASSERTION(parser, "failed getting 'noauth' url parser");
    35     if (parser) {
    36         gNoAuthURLParser = parser.get();
    37         NS_ADDREF(gNoAuthURLParser);
    38     }
    40     parser = do_GetService(NS_AUTHURLPARSER_CONTRACTID);
    41     NS_ASSERTION(parser, "failed getting 'auth' url parser");
    42     if (parser) {
    43         gAuthURLParser = parser.get();
    44         NS_ADDREF(gAuthURLParser);
    45     }
    47     parser = do_GetService(NS_STDURLPARSER_CONTRACTID);
    48     NS_ASSERTION(parser, "failed getting 'std' url parser");
    49     if (parser) {
    50         gStdURLParser = parser.get();
    51         NS_ADDREF(gStdURLParser);
    52     }
    54     gInitialized = true;
    55 }
    57 void
    58 net_ShutdownURLHelper()
    59 {
    60     if (gInitialized) {
    61         NS_IF_RELEASE(gNoAuthURLParser);
    62         NS_IF_RELEASE(gAuthURLParser);
    63         NS_IF_RELEASE(gStdURLParser);
    64         gInitialized = false;
    65     }
    66 }
    68 //----------------------------------------------------------------------------
    69 // nsIURLParser getters
    70 //----------------------------------------------------------------------------
    72 nsIURLParser *
    73 net_GetAuthURLParser()
    74 {
    75     if (!gInitialized)
    76         InitGlobals();
    77     return gAuthURLParser;
    78 }
    80 nsIURLParser *
    81 net_GetNoAuthURLParser()
    82 {
    83     if (!gInitialized)
    84         InitGlobals();
    85     return gNoAuthURLParser;
    86 }
    88 nsIURLParser *
    89 net_GetStdURLParser()
    90 {
    91     if (!gInitialized)
    92         InitGlobals();
    93     return gStdURLParser;
    94 }
    96 //---------------------------------------------------------------------------
    97 // GetFileFromURLSpec implementations
    98 //---------------------------------------------------------------------------
    99 nsresult
   100 net_GetURLSpecFromDir(nsIFile *aFile, nsACString &result)
   101 {
   102     nsAutoCString escPath;
   103     nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
   104     if (NS_FAILED(rv))
   105         return rv;
   107     if (escPath.Last() != '/') {
   108         escPath += '/';
   109     }
   111     result = escPath;
   112     return NS_OK;
   113 }
   115 nsresult
   116 net_GetURLSpecFromFile(nsIFile *aFile, nsACString &result)
   117 {
   118     nsAutoCString escPath;
   119     nsresult rv = net_GetURLSpecFromActualFile(aFile, escPath);
   120     if (NS_FAILED(rv))
   121         return rv;
   123     // if this file references a directory, then we need to ensure that the
   124     // URL ends with a slash.  this is important since it affects the rules
   125     // for relative URL resolution when this URL is used as a base URL.
   126     // if the file does not exist, then we make no assumption about its type,
   127     // and simply leave the URL unmodified.
   128     if (escPath.Last() != '/') {
   129         bool dir;
   130         rv = aFile->IsDirectory(&dir);
   131         if (NS_SUCCEEDED(rv) && dir)
   132             escPath += '/';
   133     }
   135     result = escPath;
   136     return NS_OK;
   137 }
   139 //----------------------------------------------------------------------------
   140 // file:// URL parsing
   141 //----------------------------------------------------------------------------
   143 nsresult
   144 net_ParseFileURL(const nsACString &inURL,
   145                  nsACString &outDirectory,
   146                  nsACString &outFileBaseName,
   147                  nsACString &outFileExtension)
   148 {
   149     nsresult rv;
   151     outDirectory.Truncate();
   152     outFileBaseName.Truncate();
   153     outFileExtension.Truncate();
   155     const nsPromiseFlatCString &flatURL = PromiseFlatCString(inURL);
   156     const char *url = flatURL.get();
   158     uint32_t schemeBeg, schemeEnd;
   159     rv = net_ExtractURLScheme(flatURL, &schemeBeg, &schemeEnd, nullptr);
   160     if (NS_FAILED(rv)) return rv;
   162     if (strncmp(url + schemeBeg, "file", schemeEnd - schemeBeg) != 0) {
   163         NS_ERROR("must be a file:// url");
   164         return NS_ERROR_UNEXPECTED;
   165     }
   167     nsIURLParser *parser = net_GetNoAuthURLParser();
   168     NS_ENSURE_TRUE(parser, NS_ERROR_UNEXPECTED);
   170     uint32_t pathPos, filepathPos, directoryPos, basenamePos, extensionPos;
   171     int32_t pathLen, filepathLen, directoryLen, basenameLen, extensionLen;
   173     // invoke the parser to extract the URL path
   174     rv = parser->ParseURL(url, flatURL.Length(),
   175                           nullptr, nullptr, // don't care about scheme
   176                           nullptr, nullptr, // don't care about authority
   177                           &pathPos, &pathLen);
   178     if (NS_FAILED(rv)) return rv;
   180     // invoke the parser to extract filepath from the path
   181     rv = parser->ParsePath(url + pathPos, pathLen,
   182                            &filepathPos, &filepathLen,
   183                            nullptr, nullptr,  // don't care about query
   184                            nullptr, nullptr); // don't care about ref
   185     if (NS_FAILED(rv)) return rv;
   187     filepathPos += pathPos;
   189     // invoke the parser to extract the directory and filename from filepath
   190     rv = parser->ParseFilePath(url + filepathPos, filepathLen,
   191                                &directoryPos, &directoryLen,
   192                                &basenamePos, &basenameLen,
   193                                &extensionPos, &extensionLen);
   194     if (NS_FAILED(rv)) return rv;
   196     if (directoryLen > 0)
   197         outDirectory = Substring(inURL, filepathPos + directoryPos, directoryLen);
   198     if (basenameLen > 0)
   199         outFileBaseName = Substring(inURL, filepathPos + basenamePos, basenameLen);
   200     if (extensionLen > 0)
   201         outFileExtension = Substring(inURL, filepathPos + extensionPos, extensionLen);
   202     // since we are using a no-auth url parser, there will never be a host
   203     // XXX not strictly true... file://localhost/foo/bar.html is a valid URL
   205     return NS_OK;
   206 }
   208 //----------------------------------------------------------------------------
   209 // path manipulation functions
   210 //----------------------------------------------------------------------------
   212 // Replace all /./ with a / while resolving URLs
   213 // But only till #? 
   214 void 
   215 net_CoalesceDirs(netCoalesceFlags flags, char* path)
   216 {
   217     /* Stolen from the old netlib's mkparse.c.
   218      *
   219      * modifies a url of the form   /foo/../foo1  ->  /foo1
   220      *                       and    /foo/./foo1   ->  /foo/foo1
   221      *                       and    /foo/foo1/..  ->  /foo/
   222      */
   223     char *fwdPtr = path;
   224     char *urlPtr = path;
   225     char *lastslash = path;
   226     uint32_t traversal = 0;
   227     uint32_t special_ftp_len = 0;
   229     /* Remember if this url is a special ftp one: */
   230     if (flags & NET_COALESCE_DOUBLE_SLASH_IS_ROOT) 
   231     {
   232        /* some schemes (for example ftp) have the speciality that 
   233           the path can begin // or /%2F to mark the root of the 
   234           servers filesystem, a simple / only marks the root relative 
   235           to the user loging in. We remember the length of the marker */
   236         if (nsCRT::strncasecmp(path,"/%2F",4) == 0)
   237             special_ftp_len = 4;
   238         else if (nsCRT::strncmp(path,"//",2) == 0 )
   239             special_ftp_len = 2; 
   240     }
   242     /* find the last slash before # or ? */
   243     for(; (*fwdPtr != '\0') && 
   244             (*fwdPtr != '?') && 
   245             (*fwdPtr != '#'); ++fwdPtr)
   246     {
   247     }
   249     /* found nothing, but go back one only */
   250     /* if there is something to go back to */
   251     if (fwdPtr != path && *fwdPtr == '\0')
   252     {
   253         --fwdPtr;
   254     }
   256     /* search the slash */
   257     for(; (fwdPtr != path) && 
   258             (*fwdPtr != '/'); --fwdPtr)
   259     {
   260     }
   261     lastslash = fwdPtr;
   262     fwdPtr = path;
   264     /* replace all %2E or %2e with . in the path */
   265     /* but stop at lastchar if non null */
   266     for(; (*fwdPtr != '\0') && 
   267             (*fwdPtr != '?') && 
   268             (*fwdPtr != '#') &&
   269             (*lastslash == '\0' || fwdPtr != lastslash); ++fwdPtr)
   270     {
   271         if (*fwdPtr == '%' && *(fwdPtr+1) == '2' && 
   272             (*(fwdPtr+2) == 'E' || *(fwdPtr+2) == 'e'))
   273         {
   274             *urlPtr++ = '.';
   275             ++fwdPtr;
   276             ++fwdPtr;
   277         } 
   278         else 
   279         {
   280             *urlPtr++ = *fwdPtr;
   281         }
   282     }
   283     // Copy remaining stuff past the #?;
   284     for (; *fwdPtr != '\0'; ++fwdPtr)
   285     {
   286         *urlPtr++ = *fwdPtr;
   287     }
   288     *urlPtr = '\0';  // terminate the url 
   290     // start again, this time for real 
   291     fwdPtr = path;
   292     urlPtr = path;
   294     for(; (*fwdPtr != '\0') && 
   295             (*fwdPtr != '?') && 
   296             (*fwdPtr != '#'); ++fwdPtr)
   297     {
   298         if (*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '/' )
   299         {
   300             // remove . followed by slash
   301             ++fwdPtr;
   302         }
   303         else if(*fwdPtr == '/' && *(fwdPtr+1) == '.' && *(fwdPtr+2) == '.' && 
   304                 (*(fwdPtr+3) == '/' || 
   305                     *(fwdPtr+3) == '\0' || // This will take care of 
   306                     *(fwdPtr+3) == '?' ||  // something like foo/bar/..#sometag
   307                     *(fwdPtr+3) == '#'))
   308         {
   309             // remove foo/.. 
   310             // reverse the urlPtr to the previous slash if possible
   311             // if url does not allow relative root then drop .. above root 
   312             // otherwise retain them in the path 
   313             if(traversal > 0 || !(flags & 
   314                                   NET_COALESCE_ALLOW_RELATIVE_ROOT))
   315             { 
   316                 if (urlPtr != path)
   317                     urlPtr--; // we must be going back at least by one 
   318                 for(;*urlPtr != '/' && urlPtr != path; urlPtr--)
   319                     ;  // null body 
   320                 --traversal; // count back
   321                 // forward the fwdPtr past the ../
   322                 fwdPtr += 2;
   323                 // if we have reached the beginning of the path
   324                 // while searching for the previous / and we remember
   325                 // that it is an url that begins with /%2F then
   326                 // advance urlPtr again by 3 chars because /%2F already 
   327                 // marks the root of the path
   328                 if (urlPtr == path && special_ftp_len > 3) 
   329                 {
   330                     ++urlPtr;
   331                     ++urlPtr;
   332                     ++urlPtr;
   333                 }
   334                 // special case if we have reached the end 
   335                 // to preserve the last /
   336                 if (*fwdPtr == '.' && *(fwdPtr+1) == '\0')
   337                     ++urlPtr;
   338             } 
   339             else 
   340             {
   341                 // there are to much /.. in this path, just copy them instead.
   342                 // forward the urlPtr past the /.. and copying it
   344                 // However if we remember it is an url that starts with
   345                 // /%2F and urlPtr just points at the "F" of "/%2F" then do 
   346                 // not overwrite it with the /, just copy .. and move forward
   347                 // urlPtr. 
   348                 if (special_ftp_len > 3 && urlPtr == path+special_ftp_len-1)
   349                     ++urlPtr;
   350                 else 
   351                     *urlPtr++ = *fwdPtr;
   352                 ++fwdPtr;
   353                 *urlPtr++ = *fwdPtr;
   354                 ++fwdPtr;
   355                 *urlPtr++ = *fwdPtr;
   356             }
   357         }
   358         else
   359         {
   360             // count the hierachie, but only if we do not have reached
   361             // the root of some special urls with a special root marker 
   362             if (*fwdPtr == '/' &&  *(fwdPtr+1) != '.' &&
   363                (special_ftp_len != 2 || *(fwdPtr+1) != '/'))
   364                 traversal++;
   365             // copy the url incrementaly 
   366             *urlPtr++ = *fwdPtr;
   367         }
   368     }
   370     /* 
   371      *  Now lets remove trailing . case
   372      *     /foo/foo1/.   ->  /foo/foo1/
   373      */
   375     if ((urlPtr > (path+1)) && (*(urlPtr-1) == '.') && (*(urlPtr-2) == '/'))
   376         urlPtr--;
   378     // Copy remaining stuff past the #?;
   379     for (; *fwdPtr != '\0'; ++fwdPtr)
   380     {
   381         *urlPtr++ = *fwdPtr;
   382     }
   383     *urlPtr = '\0';  // terminate the url 
   384 }
   386 nsresult
   387 net_ResolveRelativePath(const nsACString &relativePath,
   388                         const nsACString &basePath,
   389                         nsACString &result)
   390 {
   391     nsAutoCString name;
   392     nsAutoCString path(basePath);
   393     bool needsDelim = false;
   395     if ( !path.IsEmpty() ) {
   396         char16_t last = path.Last();
   397         needsDelim = !(last == '/');
   398     }
   400     nsACString::const_iterator beg, end;
   401     relativePath.BeginReading(beg);
   402     relativePath.EndReading(end);
   404     bool stop = false;
   405     char c;
   406     for (; !stop; ++beg) {
   407         c = (beg == end) ? '\0' : *beg;
   408         //printf("%c [name=%s] [path=%s]\n", c, name.get(), path.get());
   409         switch (c) {
   410           case '\0':
   411           case '#':
   412           case '?':
   413             stop = true;
   414             // fall through...
   415           case '/':
   416             // delimiter found
   417             if (name.EqualsLiteral("..")) {
   418                 // pop path
   419                 // If we already have the delim at end, then
   420                 //  skip over that when searching for next one to the left
   421                 int32_t offset = path.Length() - (needsDelim ? 1 : 2);
   422                 // First check for errors
   423                 if (offset < 0 ) 
   424                     return NS_ERROR_MALFORMED_URI;
   425                 int32_t pos = path.RFind("/", false, offset);
   426                 if (pos >= 0)
   427                     path.Truncate(pos + 1);
   428                 else
   429                     path.Truncate();
   430             }
   431             else if (name.IsEmpty() || name.EqualsLiteral(".")) {
   432                 // do nothing
   433             }
   434             else {
   435                 // append name to path
   436                 if (needsDelim)
   437                     path += '/';
   438                 path += name;
   439                 needsDelim = true;
   440             }
   441             name.Truncate();
   442             break;
   444           default:
   445             // append char to name
   446             name += c;
   447         }
   448     }
   449     // append anything left on relativePath (e.g. #..., ;..., ?...)
   450     if (c != '\0')
   451         path += Substring(--beg, end);
   453     result = path;
   454     return NS_OK;
   455 }
   457 //----------------------------------------------------------------------------
   458 // scheme fu
   459 //----------------------------------------------------------------------------
   461 /* Extract URI-Scheme if possible */
   462 nsresult
   463 net_ExtractURLScheme(const nsACString &inURI,
   464                      uint32_t *startPos, 
   465                      uint32_t *endPos,
   466                      nsACString *scheme)
   467 {
   468     // search for something up to a colon, and call it the scheme
   469     const nsPromiseFlatCString &flatURI = PromiseFlatCString(inURI);
   470     const char* uri_start = flatURI.get();
   471     const char* uri = uri_start;
   473     if (!uri)
   474         return NS_ERROR_MALFORMED_URI;
   476     // skip leading white space
   477     while (nsCRT::IsAsciiSpace(*uri))
   478         uri++;
   480     uint32_t start = uri - uri_start;
   481     if (startPos) {
   482         *startPos = start;
   483     }
   485     uint32_t length = 0;
   486     char c;
   487     while ((c = *uri++) != '\0') {
   488         // First char must be Alpha
   489         if (length == 0 && nsCRT::IsAsciiAlpha(c)) {
   490             length++;
   491         } 
   492         // Next chars can be alpha + digit + some special chars
   493         else if (length > 0 && (nsCRT::IsAsciiAlpha(c) || 
   494                  nsCRT::IsAsciiDigit(c) || c == '+' || 
   495                  c == '.' || c == '-')) {
   496             length++;
   497         }
   498         // stop if colon reached but not as first char
   499         else if (c == ':' && length > 0) {
   500             if (endPos) {
   501                 *endPos = start + length;
   502             }
   504             if (scheme)
   505                 scheme->Assign(Substring(inURI, start, length));
   506             return NS_OK;
   507         }
   508         else 
   509             break;
   510     }
   511     return NS_ERROR_MALFORMED_URI;
   512 }
   514 bool
   515 net_IsValidScheme(const char *scheme, uint32_t schemeLen)
   516 {
   517     // first char must be alpha
   518     if (!nsCRT::IsAsciiAlpha(*scheme))
   519         return false;
   521     // nsCStrings may have embedded nulls -- reject those too
   522     for (; schemeLen; ++scheme, --schemeLen) {
   523         if (!(nsCRT::IsAsciiAlpha(*scheme) ||
   524               nsCRT::IsAsciiDigit(*scheme) ||
   525               *scheme == '+' ||
   526               *scheme == '.' ||
   527               *scheme == '-'))
   528             return false;
   529     }
   531     return true;
   532 }
   534 bool
   535 net_FilterURIString(const char *str, nsACString& result)
   536 {
   537     NS_PRECONDITION(str, "Must have a non-null string!");
   538     bool writing = false;
   539     result.Truncate();
   540     const char *p = str;
   542     // Remove leading spaces, tabs, CR, LF if any.
   543     while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') {
   544         writing = true;
   545         str = p + 1;
   546         p++;
   547     }
   549     // Don't strip from the scheme, because other code assumes everything
   550     // up to the ':' is the scheme, and it's bad not to have it match.
   551     // If there's no ':', strip.
   552     bool found_colon = false;
   553     const char *first = nullptr;
   554     while (*p) {
   555         switch (*p) {
   556             case '\t': 
   557             case '\r': 
   558             case '\n':
   559                 if (found_colon) {
   560                     writing = true;
   561                     // append chars up to but not including *p
   562                     if (p > str)
   563                         result.Append(str, p - str);
   564                     str = p + 1;
   565                 } else {
   566                     // remember where the first \t\r\n was in case we find no scheme
   567                     if (!first)
   568                         first = p;
   569                 }
   570                 break;
   572             case ':':
   573                 found_colon = true;
   574                 break;
   576             case '/':
   577             case '@':
   578                 if (!found_colon) {
   579                     // colon also has to precede / or @ to be a scheme
   580                     found_colon = true; // not really, but means ok to strip
   581                     if (first) {
   582                         // go back and replace
   583                         p = first;
   584                         continue; // process *p again
   585                     }
   586                 }
   587                 break;
   589             default:
   590                 break;
   591         }
   592         p++;
   594         // At end, if there was no scheme, and we hit a control char, fix
   595         // it up now.
   596         if (!*p && first != nullptr && !found_colon) {
   597             // TRICKY - to avoid duplicating code, we reset the loop back
   598             // to the point we found something to do
   599             p = first;
   600             // This also stops us from looping after we finish
   601             found_colon = true; // so we'll replace \t\r\n
   602         }
   603     }
   605     // Remove trailing spaces if any
   606     while (((p-1) >= str) && (*(p-1) == ' ')) {
   607         writing = true;
   608         p--;
   609     }
   611     if (writing && p > str)
   612         result.Append(str, p - str);
   614     return writing;
   615 }
   617 #if defined(XP_WIN)
   618 bool
   619 net_NormalizeFileURL(const nsACString &aURL, nsCString &aResultBuf)
   620 {
   621     bool writing = false;
   623     nsACString::const_iterator beginIter, endIter;
   624     aURL.BeginReading(beginIter);
   625     aURL.EndReading(endIter);
   627     const char *s, *begin = beginIter.get();
   629     for (s = begin; s != endIter.get(); ++s)
   630     {
   631         if (*s == '\\')
   632         {
   633             writing = true;
   634             if (s > begin)
   635                 aResultBuf.Append(begin, s - begin);
   636             aResultBuf += '/';
   637             begin = s + 1;
   638         }
   639     }
   640     if (writing && s > begin)
   641         aResultBuf.Append(begin, s - begin);
   643     return writing;
   644 }
   645 #endif
   647 //----------------------------------------------------------------------------
   648 // miscellaneous (i.e., stuff that should really be elsewhere)
   649 //----------------------------------------------------------------------------
   651 static inline
   652 void ToLower(char &c)
   653 {
   654     if ((unsigned)(c - 'A') <= (unsigned)('Z' - 'A'))
   655         c += 'a' - 'A';
   656 }
   658 void
   659 net_ToLowerCase(char *str, uint32_t length)
   660 {
   661     for (char *end = str + length; str < end; ++str)
   662         ToLower(*str);
   663 }
   665 void
   666 net_ToLowerCase(char *str)
   667 {
   668     for (; *str; ++str)
   669         ToLower(*str);
   670 }
   672 char *
   673 net_FindCharInSet(const char *iter, const char *stop, const char *set)
   674 {
   675     for (; iter != stop && *iter; ++iter) {
   676         for (const char *s = set; *s; ++s) {
   677             if (*iter == *s)
   678                 return (char *) iter;
   679         }
   680     }
   681     return (char *) iter;
   682 }
   684 char *
   685 net_FindCharNotInSet(const char *iter, const char *stop, const char *set)
   686 {
   687 repeat:
   688     for (const char *s = set; *s; ++s) {
   689         if (*iter == *s) {
   690             if (++iter == stop)
   691                 break;
   692             goto repeat;
   693         }
   694     }
   695     return (char *) iter;
   696 }
   698 char *
   699 net_RFindCharNotInSet(const char *stop, const char *iter, const char *set)
   700 {
   701     --iter;
   702     --stop;
   704     if (iter == stop)
   705         return (char *) iter;
   707 repeat:
   708     for (const char *s = set; *s; ++s) {
   709         if (*iter == *s) {
   710             if (--iter == stop)
   711                 break;
   712             goto repeat;
   713         }
   714     }
   715     return (char *) iter;
   716 }
   718 #define HTTP_LWS " \t"
   720 // Return the index of the closing quote of the string, if any
   721 static uint32_t
   722 net_FindStringEnd(const nsCString& flatStr,
   723                   uint32_t stringStart,
   724                   char stringDelim)
   725 {
   726     NS_ASSERTION(stringStart < flatStr.Length() &&
   727                  flatStr.CharAt(stringStart) == stringDelim &&
   728                  (stringDelim == '"' || stringDelim == '\''),
   729                  "Invalid stringStart");
   731     const char set[] = { stringDelim, '\\', '\0' };
   732     do {
   733         // stringStart points to either the start quote or the last
   734         // escaped char (the char following a '\\')
   736         // Write to searchStart here, so that when we get back to the
   737         // top of the loop right outside this one we search from the
   738         // right place.
   739         uint32_t stringEnd = flatStr.FindCharInSet(set, stringStart + 1);
   740         if (stringEnd == uint32_t(kNotFound))
   741             return flatStr.Length();
   743         if (flatStr.CharAt(stringEnd) == '\\') {
   744             // Hit a backslash-escaped char.  Need to skip over it.
   745             stringStart = stringEnd + 1;
   746             if (stringStart == flatStr.Length())
   747                 return stringStart;
   749             // Go back to looking for the next escape or the string end
   750             continue;
   751         }
   753         return stringEnd;
   755     } while (true);
   757     NS_NOTREACHED("How did we get here?");
   758     return flatStr.Length();
   759 }
   762 static uint32_t
   763 net_FindMediaDelimiter(const nsCString& flatStr,
   764                        uint32_t searchStart,
   765                        char delimiter)
   766 {
   767     do {
   768         // searchStart points to the spot from which we should start looking
   769         // for the delimiter.
   770         const char delimStr[] = { delimiter, '"', '\0' };
   771         uint32_t curDelimPos = flatStr.FindCharInSet(delimStr, searchStart);
   772         if (curDelimPos == uint32_t(kNotFound))
   773             return flatStr.Length();
   775         char ch = flatStr.CharAt(curDelimPos);
   776         if (ch == delimiter) {
   777             // Found delimiter
   778             return curDelimPos;
   779         }
   781         // We hit the start of a quoted string.  Look for its end.
   782         searchStart = net_FindStringEnd(flatStr, curDelimPos, ch);
   783         if (searchStart == flatStr.Length())
   784             return searchStart;
   786         ++searchStart;
   788         // searchStart now points to the first char after the end of the
   789         // string, so just go back to the top of the loop and look for
   790         // |delimiter| again.
   791     } while (true);
   793     NS_NOTREACHED("How did we get here?");
   794     return flatStr.Length();
   795 }
   797 // aOffset should be added to aCharsetStart and aCharsetEnd if this
   798 // function sets them.
   799 static void
   800 net_ParseMediaType(const nsACString &aMediaTypeStr,
   801                    nsACString       &aContentType,
   802                    nsACString       &aContentCharset,
   803                    int32_t          aOffset,
   804                    bool             *aHadCharset,
   805                    int32_t          *aCharsetStart,
   806                    int32_t          *aCharsetEnd)
   807 {
   808     const nsCString& flatStr = PromiseFlatCString(aMediaTypeStr);
   809     const char* start = flatStr.get();
   810     const char* end = start + flatStr.Length();
   812     // Trim LWS leading and trailing whitespace from type.  We include '(' in
   813     // the trailing trim set to catch media-type comments, which are not at all
   814     // standard, but may occur in rare cases.
   815     const char* type = net_FindCharNotInSet(start, end, HTTP_LWS);
   816     const char* typeEnd = net_FindCharInSet(type, end, HTTP_LWS ";(");
   818     const char* charset = "";
   819     const char* charsetEnd = charset;
   820     int32_t charsetParamStart = 0;
   821     int32_t charsetParamEnd = 0;
   823     // Iterate over parameters
   824     bool typeHasCharset = false;
   825     uint32_t paramStart = flatStr.FindChar(';', typeEnd - start);
   826     if (paramStart != uint32_t(kNotFound)) {
   827         // We have parameters.  Iterate over them.
   828         uint32_t curParamStart = paramStart + 1;
   829         do {
   830             uint32_t curParamEnd =
   831                 net_FindMediaDelimiter(flatStr, curParamStart, ';');
   833             const char* paramName = net_FindCharNotInSet(start + curParamStart,
   834                                                          start + curParamEnd,
   835                                                          HTTP_LWS);
   836             static const char charsetStr[] = "charset=";
   837             if (PL_strncasecmp(paramName, charsetStr,
   838                                sizeof(charsetStr) - 1) == 0) {
   839                 charset = paramName + sizeof(charsetStr) - 1;
   840                 charsetEnd = start + curParamEnd;
   841                 typeHasCharset = true;
   842                 charsetParamStart = curParamStart - 1;
   843                 charsetParamEnd = curParamEnd;
   844             }
   846             curParamStart = curParamEnd + 1;
   847         } while (curParamStart < flatStr.Length());
   848     }
   850     bool charsetNeedsQuotedStringUnescaping = false;
   851     if (typeHasCharset) {
   852         // Trim LWS leading and trailing whitespace from charset.  We include
   853         // '(' in the trailing trim set to catch media-type comments, which are
   854         // not at all standard, but may occur in rare cases.
   855         charset = net_FindCharNotInSet(charset, charsetEnd, HTTP_LWS);
   856         if (*charset == '"') {
   857             charsetNeedsQuotedStringUnescaping = true;
   858             charsetEnd =
   859                 start + net_FindStringEnd(flatStr, charset - start, *charset);
   860             charset++;
   861             NS_ASSERTION(charsetEnd >= charset, "Bad charset parsing");
   862         } else {
   863             charsetEnd = net_FindCharInSet(charset, charsetEnd, HTTP_LWS ";(");
   864         }
   865     }
   867     // if the server sent "*/*", it is meaningless, so do not store it.
   868     // also, if type is the same as aContentType, then just update the
   869     // charset.  however, if charset is empty and aContentType hasn't
   870     // changed, then don't wipe-out an existing aContentCharset.  We
   871     // also want to reject a mime-type if it does not include a slash.
   872     // some servers give junk after the charset parameter, which may
   873     // include a comma, so this check makes us a bit more tolerant.
   875     if (type != typeEnd && strncmp(type, "*/*", typeEnd - type) != 0 &&
   876         memchr(type, '/', typeEnd - type) != nullptr) {
   877         // Common case here is that aContentType is empty
   878         bool eq = !aContentType.IsEmpty() &&
   879             aContentType.Equals(Substring(type, typeEnd),
   880                                 nsCaseInsensitiveCStringComparator());
   881         if (!eq) {
   882             aContentType.Assign(type, typeEnd - type);
   883             ToLowerCase(aContentType);
   884         }
   886         if ((!eq && *aHadCharset) || typeHasCharset) {
   887             *aHadCharset = true;
   888             if (charsetNeedsQuotedStringUnescaping) {
   889                 // parameters using the "quoted-string" syntax need
   890                 // backslash-escapes to be unescaped (see RFC 2616 Section 2.2)
   891                 aContentCharset.Truncate();
   892                 for (const char *c = charset; c != charsetEnd; c++) {
   893                     if (*c == '\\' && c + 1 != charsetEnd) {
   894                         // eat escape
   895                         c++;  
   896                     }
   897                     aContentCharset.Append(*c);
   898                 }
   899             }
   900             else {
   901                 aContentCharset.Assign(charset, charsetEnd - charset);
   902             }
   903             if (typeHasCharset) {
   904                 *aCharsetStart = charsetParamStart + aOffset;
   905                 *aCharsetEnd = charsetParamEnd + aOffset;
   906             }
   907         }
   908         // Only set a new charset position if this is a different type
   909         // from the last one we had and it doesn't already have a
   910         // charset param.  If this is the same type, we probably want
   911         // to leave the charset position on its first occurrence.
   912         if (!eq && !typeHasCharset) {
   913             int32_t charsetStart = int32_t(paramStart);
   914             if (charsetStart == kNotFound)
   915                 charsetStart =  flatStr.Length();
   917             *aCharsetEnd = *aCharsetStart = charsetStart + aOffset;
   918         }
   919     }
   920 }
   922 #undef HTTP_LWS
   924 void
   925 net_ParseContentType(const nsACString &aHeaderStr,
   926                      nsACString       &aContentType,
   927                      nsACString       &aContentCharset,
   928                      bool             *aHadCharset)
   929 {
   930     int32_t dummy1, dummy2;
   931     net_ParseContentType(aHeaderStr, aContentType, aContentCharset,
   932                          aHadCharset, &dummy1, &dummy2);
   933 }
   935 void
   936 net_ParseContentType(const nsACString &aHeaderStr,
   937                      nsACString       &aContentType,
   938                      nsACString       &aContentCharset,
   939                      bool             *aHadCharset,
   940                      int32_t          *aCharsetStart,
   941                      int32_t          *aCharsetEnd)
   942 {
   943     //
   944     // Augmented BNF (from RFC 2616 section 3.7):
   945     //
   946     //   header-value = media-type *( LWS "," LWS media-type )
   947     //   media-type   = type "/" subtype *( LWS ";" LWS parameter )
   948     //   type         = token
   949     //   subtype      = token
   950     //   parameter    = attribute "=" value
   951     //   attribute    = token
   952     //   value        = token | quoted-string
   953     //   
   954     //
   955     // Examples:
   956     //
   957     //   text/html
   958     //   text/html, text/html
   959     //   text/html,text/html; charset=ISO-8859-1
   960     //   text/html,text/html; charset="ISO-8859-1"
   961     //   text/html;charset=ISO-8859-1, text/html
   962     //   text/html;charset='ISO-8859-1', text/html
   963     //   application/octet-stream
   964     //
   966     *aHadCharset = false;
   967     const nsCString& flatStr = PromiseFlatCString(aHeaderStr);
   969     // iterate over media-types.  Note that ',' characters can happen
   970     // inside quoted strings, so we need to watch out for that.
   971     uint32_t curTypeStart = 0;
   972     do {
   973         // curTypeStart points to the start of the current media-type.  We want
   974         // to look for its end.
   975         uint32_t curTypeEnd =
   976             net_FindMediaDelimiter(flatStr, curTypeStart, ',');
   978         // At this point curTypeEnd points to the spot where the media-type
   979         // starting at curTypeEnd ends.  Time to parse that!
   980         net_ParseMediaType(Substring(flatStr, curTypeStart,
   981                                      curTypeEnd - curTypeStart),
   982                            aContentType, aContentCharset, curTypeStart,
   983                            aHadCharset, aCharsetStart, aCharsetEnd);
   985         // And let's move on to the next media-type
   986         curTypeStart = curTypeEnd + 1;
   987     } while (curTypeStart < flatStr.Length());
   988 }
   990 bool
   991 net_IsValidHostName(const nsCSubstring &host)
   992 {
   993     const char *end = host.EndReading();
   994     // Use explicit whitelists to select which characters we are
   995     // willing to send to lower-level DNS logic. This is more
   996     // self-documenting, and can also be slightly faster than the
   997     // blacklist approach, since DNS names are the common case, and
   998     // the commonest characters will tend to be near the start of
   999     // the list.
  1001     // Whitelist for DNS names (RFC 1035) with extra characters added 
  1002     // for pragmatic reasons "$+_"
  1003     // see https://bugzilla.mozilla.org/show_bug.cgi?id=355181#c2
  1004     if (net_FindCharNotInSet(host.BeginReading(), end,
  1005                              "abcdefghijklmnopqrstuvwxyz"
  1006                              ".-0123456789"
  1007                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ$+_") == end)
  1008         return true;
  1010     // Might be a valid IPv6 link-local address containing a percent sign
  1011     nsAutoCString strhost(host);
  1012     PRNetAddr addr;
  1013     return PR_StringToNetAddr(strhost.get(), &addr) == PR_SUCCESS;
  1016 bool
  1017 net_IsValidIPv4Addr(const char *addr, int32_t addrLen)
  1019     RangedPtr<const char> p(addr, addrLen);
  1021     int32_t octet = -1;   // means no digit yet
  1022     int32_t dotCount = 0; // number of dots in the address
  1024     for (; addrLen; ++p, --addrLen) {
  1025         if (*p == '.') {
  1026             dotCount++;
  1027             if (octet == -1) {
  1028                 // invalid octet
  1029                 return false;
  1031             octet = -1;
  1032         } else if (*p >= '0' && *p <='9') {
  1033             if (octet == 0) {
  1034                 // leading 0 is not allowed
  1035                 return false;
  1036             } else if (octet == -1) {
  1037                 octet = *p - '0';
  1038             } else {
  1039                 octet *= 10;
  1040                 octet += *p - '0';
  1041                 if (octet > 255)
  1042                     return false;
  1044         } else {
  1045             // invalid character
  1046             return false;
  1050     return (dotCount == 3 && octet != -1);
  1053 bool
  1054 net_IsValidIPv6Addr(const char *addr, int32_t addrLen)
  1056     RangedPtr<const char> p(addr, addrLen);
  1058     int32_t digits = 0; // number of digits in current block
  1059     int32_t colons = 0; // number of colons in a row during parsing
  1060     int32_t blocks = 0; // number of hexadecimal blocks
  1061     bool haveZeros = false; // true if double colon is present in the address
  1063     for (; addrLen; ++p, --addrLen) {
  1064         if (*p == ':') {
  1065             if (colons == 0) {
  1066                 if (digits != 0) {
  1067                     digits = 0;
  1068                     blocks++;
  1070             } else if (colons == 1) {
  1071                 if (haveZeros)
  1072                     return false; // only one occurrence is allowed
  1073                 haveZeros = true;
  1074             } else {
  1075                 // too many colons in a row
  1076                 return false;
  1078             colons++;
  1079         } else if ((*p >= '0' && *p <= '9') || (*p >= 'a' && *p <= 'f') ||
  1080                    (*p >= 'A' && *p <= 'F')) {
  1081             if (colons == 1 && blocks == 0) // starts with a single colon
  1082                 return false;
  1083             if (digits == 4) // too many digits
  1084                 return false;
  1085             colons = 0;
  1086             digits++;
  1087         } else if (*p == '.') {
  1088             // check valid IPv4 from the beginning of the last block
  1089             if (!net_IsValidIPv4Addr(p.get() - digits, addrLen + digits))
  1090                 return false;
  1091             return (haveZeros && blocks < 6) || (!haveZeros && blocks == 6);
  1092         } else {
  1093             // invalid character
  1094             return false;
  1098     if (colons == 1) // ends with a single colon
  1099         return false;
  1101     if (digits) // there is a block at the end
  1102         blocks++;
  1104     return (haveZeros && blocks < 8) || (!haveZeros && blocks == 8);

mercurial