netwerk/dns/nsEffectiveTLDService.cpp

Wed, 31 Dec 2014 06:55:46 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:55:46 +0100
changeset 1
ca08bd8f51b2
permissions
-rw-r--r--

Added tag TORBROWSER_REPLICA for changeset 6474c204b198

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
     3 /* This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 // This service reads a file of rules describing TLD-like domain names.  For a
     8 // complete description of the expected file format and parsing rules, see
     9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
    11 #include "mozilla/ArrayUtils.h"
    12 #include "mozilla/MemoryReporting.h"
    14 #include "nsEffectiveTLDService.h"
    15 #include "nsIIDNService.h"
    16 #include "nsNetUtil.h"
    17 #include "prnetdb.h"
    19 using namespace mozilla;
    21 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
    22                   nsIMemoryReporter)
    24 // ----------------------------------------------------------------------
    26 #define ETLD_STR_NUM_1(line) str##line
    27 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
    28 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
    30 const ETLDEntry nsDomainEntry::entries[] = {
    31 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
    32 #include "etld_data.inc"
    33 #undef ETLD_ENTRY
    34 };
    36 const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
    37   {
    38 #define ETLD_ENTRY(name, ex, wild) name,
    39 #include "etld_data.inc"
    40 #undef ETLD_ENTRY
    41   }
    42 };
    44 // Dummy function to statically ensure that our indices don't overflow
    45 // the storage provided for them.
    46 void
    47 nsDomainEntry::FuncForStaticAsserts(void)
    48 {
    49 #define ETLD_ENTRY(name, ex, wild)                                      \
    50   static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
    51                 "invalid strtab index");
    52 #include "etld_data.inc"
    53 #undef ETLD_ENTRY
    54 }
    56 #undef ETLD_ENTRY_OFFSET
    57 #undef ETLD_STR_NUM
    58 #undef ETLD_STR_NUM1
    60 // ----------------------------------------------------------------------
    62 static nsEffectiveTLDService *gService = nullptr;
    64 nsEffectiveTLDService::nsEffectiveTLDService()
    65   // We'll probably have to rehash at least once, since nsTHashtable doesn't
    66   // use a perfect hash, but at least we'll save a few rehashes along the way.
    67   // Next optimization here is to precompute the hash using something like
    68   // gperf, but one step at a time.  :-)
    69   : mHash(ArrayLength(nsDomainEntry::entries))
    70 {
    71 }
    73 nsresult
    74 nsEffectiveTLDService::Init()
    75 {
    76   const ETLDEntry *entries = nsDomainEntry::entries;
    78   nsresult rv;
    79   mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
    80   if (NS_FAILED(rv)) return rv;
    82   // Initialize eTLD hash from static array
    83   for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) {
    84     const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index);
    85 #ifdef DEBUG
    86     nsDependentCString name(domain);
    87     nsAutoCString normalizedName(domain);
    88     NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
    89                  "normalization failure!");
    90     NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
    91 #endif
    92     nsDomainEntry *entry = mHash.PutEntry(domain);
    93     NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
    94     entry->SetData(&entries[i]);
    95   }
    97   MOZ_ASSERT(!gService);
    98   gService = this;
    99   RegisterWeakMemoryReporter(this);
   101   return NS_OK;
   102 }
   104 nsEffectiveTLDService::~nsEffectiveTLDService()
   105 {
   106   UnregisterWeakMemoryReporter(this);
   107   gService = nullptr;
   108 }
   110 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
   112 NS_IMETHODIMP
   113 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
   114                                       nsISupports* aData)
   115 {
   116   return MOZ_COLLECT_REPORT(
   117     "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES,
   118     SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
   119     "Memory used by the effective TLD service.");
   120 }
   122 size_t
   123 nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
   124 {
   125   size_t n = aMallocSizeOf(this);
   126   n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf);
   128   // Measurement of the following members may be added later if DMD finds it is
   129   // worthwhile:
   130   // - mIDNService
   132   return n;
   133 }
   135 // External function for dealing with URI's correctly.
   136 // Pulls out the host portion from an nsIURI, and calls through to
   137 // GetPublicSuffixFromHost().
   138 NS_IMETHODIMP
   139 nsEffectiveTLDService::GetPublicSuffix(nsIURI     *aURI,
   140                                        nsACString &aPublicSuffix)
   141 {
   142   NS_ENSURE_ARG_POINTER(aURI);
   144   nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
   145   NS_ENSURE_ARG_POINTER(innerURI);
   147   nsAutoCString host;
   148   nsresult rv = innerURI->GetAsciiHost(host);
   149   if (NS_FAILED(rv)) return rv;
   151   return GetBaseDomainInternal(host, 0, aPublicSuffix);
   152 }
   154 // External function for dealing with URI's correctly.
   155 // Pulls out the host portion from an nsIURI, and calls through to
   156 // GetBaseDomainFromHost().
   157 NS_IMETHODIMP
   158 nsEffectiveTLDService::GetBaseDomain(nsIURI     *aURI,
   159                                      uint32_t    aAdditionalParts,
   160                                      nsACString &aBaseDomain)
   161 {
   162   NS_ENSURE_ARG_POINTER(aURI);
   163   NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
   165   nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
   166   NS_ENSURE_ARG_POINTER(innerURI);
   168   nsAutoCString host;
   169   nsresult rv = innerURI->GetAsciiHost(host);
   170   if (NS_FAILED(rv)) return rv;
   172   return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
   173 }
   175 // External function for dealing with a host string directly: finds the public
   176 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
   177 NS_IMETHODIMP
   178 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
   179                                                nsACString       &aPublicSuffix)
   180 {
   181   // Create a mutable copy of the hostname and normalize it to ACE.
   182   // This will fail if the hostname includes invalid characters.
   183   nsAutoCString normHostname(aHostname);
   184   nsresult rv = NormalizeHostname(normHostname);
   185   if (NS_FAILED(rv)) return rv;
   187   return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
   188 }
   190 // External function for dealing with a host string directly: finds the base
   191 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
   192 // requested. See GetBaseDomainInternal().
   193 NS_IMETHODIMP
   194 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
   195                                              uint32_t          aAdditionalParts,
   196                                              nsACString       &aBaseDomain)
   197 {
   198   NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
   200   // Create a mutable copy of the hostname and normalize it to ACE.
   201   // This will fail if the hostname includes invalid characters.
   202   nsAutoCString normHostname(aHostname);
   203   nsresult rv = NormalizeHostname(normHostname);
   204   if (NS_FAILED(rv)) return rv;
   206   return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
   207 }
   209 NS_IMETHODIMP
   210 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
   211                                         nsACString&       aBaseDomain)
   212 {
   213   // Create a mutable copy of the hostname and normalize it to ACE.
   214   // This will fail if the hostname includes invalid characters.
   215   nsAutoCString normHostname(aHostname);
   216   nsresult rv = NormalizeHostname(normHostname);
   217   NS_ENSURE_SUCCESS(rv, rv);
   219   return GetBaseDomainInternal(normHostname, -1, aBaseDomain);
   220 }
   222 // Finds the base domain for a host, with requested number of additional parts.
   223 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
   224 // if more subdomain parts are requested than are available, or if the hostname
   225 // includes characters that are not valid in a URL. Normalization is performed
   226 // on the host string and the result will be in UTF8.
   227 nsresult
   228 nsEffectiveTLDService::GetBaseDomainInternal(nsCString  &aHostname,
   229                                              int32_t    aAdditionalParts,
   230                                              nsACString &aBaseDomain)
   231 {
   232   if (aHostname.IsEmpty())
   233     return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
   235   // chomp any trailing dot, and keep track of it for later
   236   bool trailingDot = aHostname.Last() == '.';
   237   if (trailingDot)
   238     aHostname.Truncate(aHostname.Length() - 1);
   240   // check the edge cases of the host being '.' or having a second trailing '.',
   241   // since subsequent checks won't catch it.
   242   if (aHostname.IsEmpty() || aHostname.Last() == '.')
   243     return NS_ERROR_INVALID_ARG;
   245   // Check if we're dealing with an IPv4/IPv6 hostname, and return
   246   PRNetAddr addr;
   247   PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
   248   if (result == PR_SUCCESS)
   249     return NS_ERROR_HOST_IS_IP_ADDRESS;
   251   // Walk up the domain tree, most specific to least specific,
   252   // looking for matches at each level.  Note that a given level may
   253   // have multiple attributes (e.g. IsWild() and IsNormal()).
   254   const char *prevDomain = nullptr;
   255   const char *currDomain = aHostname.get();
   256   const char *nextDot = strchr(currDomain, '.');
   257   const char *end = currDomain + aHostname.Length();
   258   const char *eTLD = currDomain;
   259   while (1) {
   260     // sanity check the string we're about to look up: it should not begin with
   261     // a '.'; this would mean the hostname began with a '.' or had an
   262     // embedded '..' sequence.
   263     if (*currDomain == '.')
   264       return NS_ERROR_INVALID_ARG;
   266     // perform the hash lookup.
   267     nsDomainEntry *entry = mHash.GetEntry(currDomain);
   268     if (entry) {
   269       if (entry->IsWild() && prevDomain) {
   270         // wildcard rules imply an eTLD one level inferior to the match.
   271         eTLD = prevDomain;
   272         break;
   274       } else if (entry->IsNormal() || !nextDot) {
   275         // specific match, or we've hit the top domain level
   276         eTLD = currDomain;
   277         break;
   279       } else if (entry->IsException()) {
   280         // exception rules imply an eTLD one level superior to the match.
   281         eTLD = nextDot + 1;
   282         break;
   283       }
   284     }
   286     if (!nextDot) {
   287       // we've hit the top domain level; use it by default.
   288       eTLD = currDomain;
   289       break;
   290     }
   292     prevDomain = currDomain;
   293     currDomain = nextDot + 1;
   294     nextDot = strchr(currDomain, '.');
   295   }
   297   const char *begin, *iter;
   298   if (aAdditionalParts < 0) {
   299     NS_ASSERTION(aAdditionalParts == -1,
   300                  "aAdditionalParts can't be negative and different from -1");
   302     for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
   304     if (iter != eTLD) {
   305       iter++;
   306     }
   307     if (iter != eTLD) {
   308       aAdditionalParts = 0;
   309     }
   310   } else {
   311     // count off the number of requested domains.
   312     begin = aHostname.get();
   313     iter = eTLD;
   315     while (1) {
   316       if (iter == begin)
   317         break;
   319       if (*(--iter) == '.' && aAdditionalParts-- == 0) {
   320         ++iter;
   321         ++aAdditionalParts;
   322         break;
   323       }
   324     }
   325   }
   327   if (aAdditionalParts != 0)
   328     return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
   330   aBaseDomain = Substring(iter, end);
   331   // add on the trailing dot, if applicable
   332   if (trailingDot)
   333     aBaseDomain.Append('.');
   335   return NS_OK;
   336 }
   338 // Normalizes the given hostname, component by component.  ASCII/ACE
   339 // components are lower-cased, and UTF-8 components are normalized per
   340 // RFC 3454 and converted to ACE.
   341 nsresult
   342 nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
   343 {
   344   if (!IsASCII(aHostname)) {
   345     nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
   346     if (NS_FAILED(rv))
   347       return rv;
   348   }
   350   ToLowerCase(aHostname);
   351   return NS_OK;
   352 }

mercurial