The Tor Browser: toolkit/components/url-classifier/nsUrlClassifierUtils.cpp@925c144e1f1f

     1 /* This Source Code Form is subject to the terms of the Mozilla Public

     2  * License, v. 2.0. If a copy of the MPL was not distributed with this

     3  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     5 #include "nsEscape.h"

     6 #include "nsString.h"

     7 #include "nsIURI.h"

     8 #include "nsNetUtil.h"

     9 #include "nsUrlClassifierUtils.h"

    10 #include "nsTArray.h"

    11 #include "nsReadableUtils.h"

    12 #include "plbase64.h"

    13 #include "prprf.h"

    15 static char int_to_hex_digit(int32_t i)

    16 {

    17   NS_ASSERTION((i >= 0) && (i <= 15), "int too big in int_to_hex_digit");

    18   return static_cast<char>(((i < 10) ? (i + '0') : ((i - 10) + 'A')));

    19 }

    21 static bool

    22 IsDecimal(const nsACString & num)

    23 {

    24   for (uint32_t i = 0; i < num.Length(); i++) {

    25     if (!isdigit(num[i])) {

    26       return false;

    27     }

    28   }

    30   return true;

    31 }

    33 static bool

    34 IsHex(const nsACString & num)

    35 {

    36   if (num.Length() < 3) {

    37     return false;

    38   }

    40   if (num[0] != '0' || !(num[1] == 'x' || num[1] == 'X')) {

    41     return false;

    42   }

    44   for (uint32_t i = 2; i < num.Length(); i++) {

    45     if (!isxdigit(num[i])) {

    46       return false;

    47     }

    48   }

    50   return true;

    51 }

    53 static bool

    54 IsOctal(const nsACString & num)

    55 {

    56   if (num.Length() < 2) {

    57     return false;

    58   }

    60   if (num[0] != '0') {

    61     return false;

    62   }

    64   for (uint32_t i = 1; i < num.Length(); i++) {

    65     if (!isdigit(num[i]) || num[i] == '8' || num[i] == '9') {

    66       return false;

    67     }

    68   }

    70   return true;

    71 }

    73 nsUrlClassifierUtils::nsUrlClassifierUtils() : mEscapeCharmap(nullptr)

    74 {

    75 }

    77 nsresult

    78 nsUrlClassifierUtils::Init()

    79 {

    80   // Everything but alpha numerics, - and .

    81   mEscapeCharmap = new Charmap(0xffffffff, 0xfc009fff, 0xf8000001, 0xf8000001,

    82                                0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff);

    83   if (!mEscapeCharmap)

    84     return NS_ERROR_OUT_OF_MEMORY;

    85   return NS_OK;

    86 }

    88 NS_IMPL_ISUPPORTS(nsUrlClassifierUtils, nsIUrlClassifierUtils)

    90 /////////////////////////////////////////////////////////////////////////////

    91 // nsIUrlClassifierUtils

    93 NS_IMETHODIMP

    94 nsUrlClassifierUtils::GetKeyForURI(nsIURI * uri, nsACString & _retval)

    95 {

    96   nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(uri);

    97   if (!innerURI)

    98     innerURI = uri;

   100   nsAutoCString host;

   101   innerURI->GetAsciiHost(host);

   103   if (host.IsEmpty()) {

   104     return NS_ERROR_MALFORMED_URI;

   105   }

   107   nsresult rv = CanonicalizeHostname(host, _retval);

   108   NS_ENSURE_SUCCESS(rv, rv);

   110   nsAutoCString path;

   111   rv = innerURI->GetPath(path);

   112   NS_ENSURE_SUCCESS(rv, rv);

   114   // strip out anchors

   115   int32_t ref = path.FindChar('#');

   116   if (ref != kNotFound)

   117     path.SetLength(ref);

   119   nsAutoCString temp;

   120   rv = CanonicalizePath(path, temp);

   121   NS_ENSURE_SUCCESS(rv, rv);

   123   _retval.Append(temp);

   125   return NS_OK;

   126 }

   128 /////////////////////////////////////////////////////////////////////////////

   129 // non-interface methods

   131 nsresult

   132 nsUrlClassifierUtils::CanonicalizeHostname(const nsACString & hostname,

   133                                            nsACString & _retval)

   134 {

   135   nsAutoCString unescaped;

   136   if (!NS_UnescapeURL(PromiseFlatCString(hostname).get(),

   137                       PromiseFlatCString(hostname).Length(),

   138                       0, unescaped)) {

   139     unescaped.Assign(hostname);

   140   }

   142   nsAutoCString cleaned;

   143   CleanupHostname(unescaped, cleaned);

   145   nsAutoCString temp;

   146   ParseIPAddress(cleaned, temp);

   147   if (!temp.IsEmpty()) {

   148     cleaned.Assign(temp);

   149   }

   151   ToLowerCase(cleaned);

   152   SpecialEncode(cleaned, false, _retval);

   154   return NS_OK;

   155 }

   158 nsresult

   159 nsUrlClassifierUtils::CanonicalizePath(const nsACString & path,

   160                                        nsACString & _retval)

   161 {

   162   _retval.Truncate();

   164   nsAutoCString decodedPath(path);

   165   nsAutoCString temp;

   166   while (NS_UnescapeURL(decodedPath.get(), decodedPath.Length(), 0, temp)) {

   167     decodedPath.Assign(temp);

   168     temp.Truncate();

   169   }

   171   SpecialEncode(decodedPath, true, _retval);

   172   // XXX: lowercase the path?

   174   return NS_OK;

   175 }

   177 void

   178 nsUrlClassifierUtils::CleanupHostname(const nsACString & hostname,

   179                                       nsACString & _retval)

   180 {

   181   _retval.Truncate();

   183   const char* curChar = hostname.BeginReading();

   184   const char* end = hostname.EndReading();

   185   char lastChar = '\0';

   186   while (curChar != end) {

   187     unsigned char c = static_cast<unsigned char>(*curChar);

   188     if (c == '.' && (lastChar == '\0' || lastChar == '.')) {

   189       // skip

   190     } else {

   191       _retval.Append(*curChar);

   192     }

   193     lastChar = c;

   194     ++curChar;

   195   }

   197   // cut off trailing dots

   198   while (_retval.Length() > 0 && _retval[_retval.Length() - 1] == '.') {

   199     _retval.SetLength(_retval.Length() - 1);

   200   }

   201 }

   203 void

   204 nsUrlClassifierUtils::ParseIPAddress(const nsACString & host,

   205                                      nsACString & _retval)

   206 {

   207   _retval.Truncate();

   208   nsACString::const_iterator iter, end;

   209   host.BeginReading(iter);

   210   host.EndReading(end);

   212   if (host.Length() <= 15) {

   213     // The Windows resolver allows a 4-part dotted decimal IP address to

   214     // have a space followed by any old rubbish, so long as the total length

   215     // of the string doesn't get above 15 characters. So, "10.192.95.89 xy"

   216     // is resolved to 10.192.95.89.

   217     // If the string length is greater than 15 characters, e.g.

   218     // "10.192.95.89 xy.wildcard.example.com", it will be resolved through

   219     // DNS.

   221     if (FindCharInReadable(' ', iter, end)) {

   222       end = iter;

   223     }

   224   }

   226   for (host.BeginReading(iter); iter != end; iter++) {

   227     if (!(isxdigit(*iter) || *iter == 'x' || *iter == 'X' || *iter == '.')) {

   228       // not an IP

   229       return;

   230     }

   231   }

   233   host.BeginReading(iter);

   234   nsTArray<nsCString> parts;

   235   ParseString(PromiseFlatCString(Substring(iter, end)), '.', parts);

   236   if (parts.Length() > 4) {

   237     return;

   238   }

   240   // If any potentially-octal numbers (start with 0 but not hex) have

   241   // non-octal digits, no part of the ip can be in octal

   242   // XXX: this came from the old javascript implementation, is it really

   243   // supposed to be like this?

   244   bool allowOctal = true;

   245   uint32_t i;

   247   for (i = 0; i < parts.Length(); i++) {

   248     const nsCString& part = parts[i];

   249     if (part[0] == '0') {

   250       for (uint32_t j = 1; j < part.Length(); j++) {

   251         if (part[j] == 'x') {

   252           break;

   253         }

   254         if (part[j] == '8' || part[j] == '9') {

   255           allowOctal = false;

   256           break;

   257         }

   258       }

   259     }

   260   }

   262   for (i = 0; i < parts.Length(); i++) {

   263     nsAutoCString canonical;

   265     if (i == parts.Length() - 1) {

   266       CanonicalNum(parts[i], 5 - parts.Length(), allowOctal, canonical);

   267     } else {

   268       CanonicalNum(parts[i], 1, allowOctal, canonical);

   269     }

   271     if (canonical.IsEmpty()) {

   272       _retval.Truncate();

   273       return;

   274     }

   276     if (_retval.IsEmpty()) {

   277       _retval.Assign(canonical);

   278     } else {

   279       _retval.Append('.');

   280       _retval.Append(canonical);

   281     }

   282   }

   283   return;

   284 }

   286 void

   287 nsUrlClassifierUtils::CanonicalNum(const nsACString& num,

   288                                    uint32_t bytes,

   289                                    bool allowOctal,

   290                                    nsACString& _retval)

   291 {

   292   _retval.Truncate();

   294   if (num.Length() < 1) {

   295     return;

   296   }

   298   uint32_t val;

   299   if (allowOctal && IsOctal(num)) {

   300     if (PR_sscanf(PromiseFlatCString(num).get(), "%o", &val) != 1) {

   301       return;

   302     }

   303   } else if (IsDecimal(num)) {

   304     if (PR_sscanf(PromiseFlatCString(num).get(), "%u", &val) != 1) {

   305       return;

   306     }

   307   } else if (IsHex(num)) {

   308   if (PR_sscanf(PromiseFlatCString(num).get(), num[1] == 'X' ? "0X%x" : "0x%x",

   309                 &val) != 1) {

   310       return;

   311     }

   312   } else {

   313     return;

   314   }

   316   while (bytes--) {

   317     char buf[20];

   318     PR_snprintf(buf, sizeof(buf), "%u", val & 0xff);

   319     if (_retval.IsEmpty()) {

   320       _retval.Assign(buf);

   321     } else {

   322       _retval = nsDependentCString(buf) + NS_LITERAL_CSTRING(".") + _retval;

   323     }

   324     val >>= 8;

   325   }

   326 }

   328 // This function will encode all "special" characters in typical url

   329 // encoding, that is %hh where h is a valid hex digit.  It will also fold

   330 // any duplicated slashes.

   331 bool

   332 nsUrlClassifierUtils::SpecialEncode(const nsACString & url,

   333                                     bool foldSlashes,

   334                                     nsACString & _retval)

   335 {

   336   bool changed = false;

   337   const char* curChar = url.BeginReading();

   338   const char* end = url.EndReading();

   340   unsigned char lastChar = '\0';

   341   while (curChar != end) {

   342     unsigned char c = static_cast<unsigned char>(*curChar);

   343     if (ShouldURLEscape(c)) {

   344       _retval.Append('%');

   345       _retval.Append(int_to_hex_digit(c / 16));

   346       _retval.Append(int_to_hex_digit(c % 16));

   348       changed = true;

   349     } else if (foldSlashes && (c == '/' && lastChar == '/')) {

   350       // skip

   351     } else {

   352       _retval.Append(*curChar);

   353     }

   354     lastChar = c;

   355     curChar++;

   356   }

   357   return changed;

   358 }

   360 bool

   361 nsUrlClassifierUtils::ShouldURLEscape(const unsigned char c) const

   362 {

   363   return c <= 32 || c == '%' || c >=127;

   364 }

The Tor Browser / file revision

toolkit/components/url-classifier/nsUrlClassifierUtils.cpp@925c144e1f1f

toolkit/components/url-classifier/nsUrlClassifierUtils.cpp