netwerk/dns/nsEffectiveTLDService.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/netwerk/dns/nsEffectiveTLDService.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,352 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */
     1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +// This service reads a file of rules describing TLD-like domain names.  For a
    1.11 +// complete description of the expected file format and parsing rules, see
    1.12 +// http://wiki.mozilla.org/Gecko:Effective_TLD_Service
    1.13 +
    1.14 +#include "mozilla/ArrayUtils.h"
    1.15 +#include "mozilla/MemoryReporting.h"
    1.16 +
    1.17 +#include "nsEffectiveTLDService.h"
    1.18 +#include "nsIIDNService.h"
    1.19 +#include "nsNetUtil.h"
    1.20 +#include "prnetdb.h"
    1.21 +
    1.22 +using namespace mozilla;
    1.23 +
    1.24 +NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
    1.25 +                  nsIMemoryReporter)
    1.26 +
    1.27 +// ----------------------------------------------------------------------
    1.28 +
    1.29 +#define ETLD_STR_NUM_1(line) str##line
    1.30 +#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
    1.31 +#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
    1.32 +
    1.33 +const ETLDEntry nsDomainEntry::entries[] = {
    1.34 +#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
    1.35 +#include "etld_data.inc"
    1.36 +#undef ETLD_ENTRY
    1.37 +};
    1.38 +
    1.39 +const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
    1.40 +  {
    1.41 +#define ETLD_ENTRY(name, ex, wild) name,
    1.42 +#include "etld_data.inc"
    1.43 +#undef ETLD_ENTRY
    1.44 +  }
    1.45 +};
    1.46 +
    1.47 +// Dummy function to statically ensure that our indices don't overflow
    1.48 +// the storage provided for them.
    1.49 +void
    1.50 +nsDomainEntry::FuncForStaticAsserts(void)
    1.51 +{
    1.52 +#define ETLD_ENTRY(name, ex, wild)                                      \
    1.53 +  static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
    1.54 +                "invalid strtab index");
    1.55 +#include "etld_data.inc"
    1.56 +#undef ETLD_ENTRY
    1.57 +}
    1.58 +
    1.59 +#undef ETLD_ENTRY_OFFSET
    1.60 +#undef ETLD_STR_NUM
    1.61 +#undef ETLD_STR_NUM1
    1.62 +
    1.63 +// ----------------------------------------------------------------------
    1.64 +
    1.65 +static nsEffectiveTLDService *gService = nullptr;
    1.66 +
    1.67 +nsEffectiveTLDService::nsEffectiveTLDService()
    1.68 +  // We'll probably have to rehash at least once, since nsTHashtable doesn't
    1.69 +  // use a perfect hash, but at least we'll save a few rehashes along the way.
    1.70 +  // Next optimization here is to precompute the hash using something like
    1.71 +  // gperf, but one step at a time.  :-)
    1.72 +  : mHash(ArrayLength(nsDomainEntry::entries))
    1.73 +{
    1.74 +}
    1.75 +
    1.76 +nsresult
    1.77 +nsEffectiveTLDService::Init()
    1.78 +{
    1.79 +  const ETLDEntry *entries = nsDomainEntry::entries;
    1.80 +
    1.81 +  nsresult rv;
    1.82 +  mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
    1.83 +  if (NS_FAILED(rv)) return rv;
    1.84 +
    1.85 +  // Initialize eTLD hash from static array
    1.86 +  for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) {
    1.87 +    const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index);
    1.88 +#ifdef DEBUG
    1.89 +    nsDependentCString name(domain);
    1.90 +    nsAutoCString normalizedName(domain);
    1.91 +    NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
    1.92 +                 "normalization failure!");
    1.93 +    NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
    1.94 +#endif
    1.95 +    nsDomainEntry *entry = mHash.PutEntry(domain);
    1.96 +    NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
    1.97 +    entry->SetData(&entries[i]);
    1.98 +  }
    1.99 +
   1.100 +  MOZ_ASSERT(!gService);
   1.101 +  gService = this;
   1.102 +  RegisterWeakMemoryReporter(this);
   1.103 +
   1.104 +  return NS_OK;
   1.105 +}
   1.106 +
   1.107 +nsEffectiveTLDService::~nsEffectiveTLDService()
   1.108 +{
   1.109 +  UnregisterWeakMemoryReporter(this);
   1.110 +  gService = nullptr;
   1.111 +}
   1.112 +
   1.113 +MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
   1.114 +
   1.115 +NS_IMETHODIMP
   1.116 +nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
   1.117 +                                      nsISupports* aData)
   1.118 +{
   1.119 +  return MOZ_COLLECT_REPORT(
   1.120 +    "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES,
   1.121 +    SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
   1.122 +    "Memory used by the effective TLD service.");
   1.123 +}
   1.124 +
   1.125 +size_t
   1.126 +nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
   1.127 +{
   1.128 +  size_t n = aMallocSizeOf(this);
   1.129 +  n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf);
   1.130 +
   1.131 +  // Measurement of the following members may be added later if DMD finds it is
   1.132 +  // worthwhile:
   1.133 +  // - mIDNService
   1.134 +
   1.135 +  return n;
   1.136 +}
   1.137 +
   1.138 +// External function for dealing with URI's correctly.
   1.139 +// Pulls out the host portion from an nsIURI, and calls through to
   1.140 +// GetPublicSuffixFromHost().
   1.141 +NS_IMETHODIMP
   1.142 +nsEffectiveTLDService::GetPublicSuffix(nsIURI     *aURI,
   1.143 +                                       nsACString &aPublicSuffix)
   1.144 +{
   1.145 +  NS_ENSURE_ARG_POINTER(aURI);
   1.146 +
   1.147 +  nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
   1.148 +  NS_ENSURE_ARG_POINTER(innerURI);
   1.149 +
   1.150 +  nsAutoCString host;
   1.151 +  nsresult rv = innerURI->GetAsciiHost(host);
   1.152 +  if (NS_FAILED(rv)) return rv;
   1.153 +
   1.154 +  return GetBaseDomainInternal(host, 0, aPublicSuffix);
   1.155 +}
   1.156 +
   1.157 +// External function for dealing with URI's correctly.
   1.158 +// Pulls out the host portion from an nsIURI, and calls through to
   1.159 +// GetBaseDomainFromHost().
   1.160 +NS_IMETHODIMP
   1.161 +nsEffectiveTLDService::GetBaseDomain(nsIURI     *aURI,
   1.162 +                                     uint32_t    aAdditionalParts,
   1.163 +                                     nsACString &aBaseDomain)
   1.164 +{
   1.165 +  NS_ENSURE_ARG_POINTER(aURI);
   1.166 +  NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
   1.167 +
   1.168 +  nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
   1.169 +  NS_ENSURE_ARG_POINTER(innerURI);
   1.170 +
   1.171 +  nsAutoCString host;
   1.172 +  nsresult rv = innerURI->GetAsciiHost(host);
   1.173 +  if (NS_FAILED(rv)) return rv;
   1.174 +
   1.175 +  return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
   1.176 +}
   1.177 +
   1.178 +// External function for dealing with a host string directly: finds the public
   1.179 +// suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
   1.180 +NS_IMETHODIMP
   1.181 +nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
   1.182 +                                               nsACString       &aPublicSuffix)
   1.183 +{
   1.184 +  // Create a mutable copy of the hostname and normalize it to ACE.
   1.185 +  // This will fail if the hostname includes invalid characters.
   1.186 +  nsAutoCString normHostname(aHostname);
   1.187 +  nsresult rv = NormalizeHostname(normHostname);
   1.188 +  if (NS_FAILED(rv)) return rv;
   1.189 +
   1.190 +  return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
   1.191 +}
   1.192 +
   1.193 +// External function for dealing with a host string directly: finds the base
   1.194 +// domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
   1.195 +// requested. See GetBaseDomainInternal().
   1.196 +NS_IMETHODIMP
   1.197 +nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
   1.198 +                                             uint32_t          aAdditionalParts,
   1.199 +                                             nsACString       &aBaseDomain)
   1.200 +{
   1.201 +  NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
   1.202 +
   1.203 +  // Create a mutable copy of the hostname and normalize it to ACE.
   1.204 +  // This will fail if the hostname includes invalid characters.
   1.205 +  nsAutoCString normHostname(aHostname);
   1.206 +  nsresult rv = NormalizeHostname(normHostname);
   1.207 +  if (NS_FAILED(rv)) return rv;
   1.208 +
   1.209 +  return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
   1.210 +}
   1.211 +
   1.212 +NS_IMETHODIMP
   1.213 +nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
   1.214 +                                        nsACString&       aBaseDomain)
   1.215 +{
   1.216 +  // Create a mutable copy of the hostname and normalize it to ACE.
   1.217 +  // This will fail if the hostname includes invalid characters.
   1.218 +  nsAutoCString normHostname(aHostname);
   1.219 +  nsresult rv = NormalizeHostname(normHostname);
   1.220 +  NS_ENSURE_SUCCESS(rv, rv);
   1.221 +
   1.222 +  return GetBaseDomainInternal(normHostname, -1, aBaseDomain);
   1.223 +}
   1.224 +
   1.225 +// Finds the base domain for a host, with requested number of additional parts.
   1.226 +// This will fail, generating an error, if the host is an IPv4/IPv6 address,
   1.227 +// if more subdomain parts are requested than are available, or if the hostname
   1.228 +// includes characters that are not valid in a URL. Normalization is performed
   1.229 +// on the host string and the result will be in UTF8.
   1.230 +nsresult
   1.231 +nsEffectiveTLDService::GetBaseDomainInternal(nsCString  &aHostname,
   1.232 +                                             int32_t    aAdditionalParts,
   1.233 +                                             nsACString &aBaseDomain)
   1.234 +{
   1.235 +  if (aHostname.IsEmpty())
   1.236 +    return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
   1.237 +
   1.238 +  // chomp any trailing dot, and keep track of it for later
   1.239 +  bool trailingDot = aHostname.Last() == '.';
   1.240 +  if (trailingDot)
   1.241 +    aHostname.Truncate(aHostname.Length() - 1);
   1.242 +
   1.243 +  // check the edge cases of the host being '.' or having a second trailing '.',
   1.244 +  // since subsequent checks won't catch it.
   1.245 +  if (aHostname.IsEmpty() || aHostname.Last() == '.')
   1.246 +    return NS_ERROR_INVALID_ARG;
   1.247 +
   1.248 +  // Check if we're dealing with an IPv4/IPv6 hostname, and return
   1.249 +  PRNetAddr addr;
   1.250 +  PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
   1.251 +  if (result == PR_SUCCESS)
   1.252 +    return NS_ERROR_HOST_IS_IP_ADDRESS;
   1.253 +
   1.254 +  // Walk up the domain tree, most specific to least specific,
   1.255 +  // looking for matches at each level.  Note that a given level may
   1.256 +  // have multiple attributes (e.g. IsWild() and IsNormal()).
   1.257 +  const char *prevDomain = nullptr;
   1.258 +  const char *currDomain = aHostname.get();
   1.259 +  const char *nextDot = strchr(currDomain, '.');
   1.260 +  const char *end = currDomain + aHostname.Length();
   1.261 +  const char *eTLD = currDomain;
   1.262 +  while (1) {
   1.263 +    // sanity check the string we're about to look up: it should not begin with
   1.264 +    // a '.'; this would mean the hostname began with a '.' or had an
   1.265 +    // embedded '..' sequence.
   1.266 +    if (*currDomain == '.')
   1.267 +      return NS_ERROR_INVALID_ARG;
   1.268 +
   1.269 +    // perform the hash lookup.
   1.270 +    nsDomainEntry *entry = mHash.GetEntry(currDomain);
   1.271 +    if (entry) {
   1.272 +      if (entry->IsWild() && prevDomain) {
   1.273 +        // wildcard rules imply an eTLD one level inferior to the match.
   1.274 +        eTLD = prevDomain;
   1.275 +        break;
   1.276 +
   1.277 +      } else if (entry->IsNormal() || !nextDot) {
   1.278 +        // specific match, or we've hit the top domain level
   1.279 +        eTLD = currDomain;
   1.280 +        break;
   1.281 +
   1.282 +      } else if (entry->IsException()) {
   1.283 +        // exception rules imply an eTLD one level superior to the match.
   1.284 +        eTLD = nextDot + 1;
   1.285 +        break;
   1.286 +      }
   1.287 +    }
   1.288 +
   1.289 +    if (!nextDot) {
   1.290 +      // we've hit the top domain level; use it by default.
   1.291 +      eTLD = currDomain;
   1.292 +      break;
   1.293 +    }
   1.294 +
   1.295 +    prevDomain = currDomain;
   1.296 +    currDomain = nextDot + 1;
   1.297 +    nextDot = strchr(currDomain, '.');
   1.298 +  }
   1.299 +
   1.300 +  const char *begin, *iter;
   1.301 +  if (aAdditionalParts < 0) {
   1.302 +    NS_ASSERTION(aAdditionalParts == -1,
   1.303 +                 "aAdditionalParts can't be negative and different from -1");
   1.304 +
   1.305 +    for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
   1.306 +
   1.307 +    if (iter != eTLD) {
   1.308 +      iter++;
   1.309 +    }
   1.310 +    if (iter != eTLD) {
   1.311 +      aAdditionalParts = 0;
   1.312 +    }
   1.313 +  } else {
   1.314 +    // count off the number of requested domains.
   1.315 +    begin = aHostname.get();
   1.316 +    iter = eTLD;
   1.317 +
   1.318 +    while (1) {
   1.319 +      if (iter == begin)
   1.320 +        break;
   1.321 +
   1.322 +      if (*(--iter) == '.' && aAdditionalParts-- == 0) {
   1.323 +        ++iter;
   1.324 +        ++aAdditionalParts;
   1.325 +        break;
   1.326 +      }
   1.327 +    }
   1.328 +  }
   1.329 +
   1.330 +  if (aAdditionalParts != 0)
   1.331 +    return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
   1.332 +
   1.333 +  aBaseDomain = Substring(iter, end);
   1.334 +  // add on the trailing dot, if applicable
   1.335 +  if (trailingDot)
   1.336 +    aBaseDomain.Append('.');
   1.337 +
   1.338 +  return NS_OK;
   1.339 +}
   1.340 +
   1.341 +// Normalizes the given hostname, component by component.  ASCII/ACE
   1.342 +// components are lower-cased, and UTF-8 components are normalized per
   1.343 +// RFC 3454 and converted to ACE.
   1.344 +nsresult
   1.345 +nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
   1.346 +{
   1.347 +  if (!IsASCII(aHostname)) {
   1.348 +    nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
   1.349 +    if (NS_FAILED(rv))
   1.350 +      return rv;
   1.351 +  }
   1.352 +
   1.353 +  ToLowerCase(aHostname);
   1.354 +  return NS_OK;
   1.355 +}

mercurial