netwerk/dns/nsEffectiveTLDService.cpp

Wed, 31 Dec 2014 06:55:46 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:55:46 +0100
changeset 1
ca08bd8f51b2
permissions
-rw-r--r--

Added tag TORBROWSER_REPLICA for changeset 6474c204b198

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
michael@0 3 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 // This service reads a file of rules describing TLD-like domain names. For a
michael@0 8 // complete description of the expected file format and parsing rules, see
michael@0 9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
michael@0 10
michael@0 11 #include "mozilla/ArrayUtils.h"
michael@0 12 #include "mozilla/MemoryReporting.h"
michael@0 13
michael@0 14 #include "nsEffectiveTLDService.h"
michael@0 15 #include "nsIIDNService.h"
michael@0 16 #include "nsNetUtil.h"
michael@0 17 #include "prnetdb.h"
michael@0 18
michael@0 19 using namespace mozilla;
michael@0 20
michael@0 21 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
michael@0 22 nsIMemoryReporter)
michael@0 23
michael@0 24 // ----------------------------------------------------------------------
michael@0 25
michael@0 26 #define ETLD_STR_NUM_1(line) str##line
michael@0 27 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
michael@0 28 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
michael@0 29
michael@0 30 const ETLDEntry nsDomainEntry::entries[] = {
michael@0 31 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
michael@0 32 #include "etld_data.inc"
michael@0 33 #undef ETLD_ENTRY
michael@0 34 };
michael@0 35
michael@0 36 const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
michael@0 37 {
michael@0 38 #define ETLD_ENTRY(name, ex, wild) name,
michael@0 39 #include "etld_data.inc"
michael@0 40 #undef ETLD_ENTRY
michael@0 41 }
michael@0 42 };
michael@0 43
michael@0 44 // Dummy function to statically ensure that our indices don't overflow
michael@0 45 // the storage provided for them.
michael@0 46 void
michael@0 47 nsDomainEntry::FuncForStaticAsserts(void)
michael@0 48 {
michael@0 49 #define ETLD_ENTRY(name, ex, wild) \
michael@0 50 static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
michael@0 51 "invalid strtab index");
michael@0 52 #include "etld_data.inc"
michael@0 53 #undef ETLD_ENTRY
michael@0 54 }
michael@0 55
michael@0 56 #undef ETLD_ENTRY_OFFSET
michael@0 57 #undef ETLD_STR_NUM
michael@0 58 #undef ETLD_STR_NUM1
michael@0 59
michael@0 60 // ----------------------------------------------------------------------
michael@0 61
michael@0 62 static nsEffectiveTLDService *gService = nullptr;
michael@0 63
michael@0 64 nsEffectiveTLDService::nsEffectiveTLDService()
michael@0 65 // We'll probably have to rehash at least once, since nsTHashtable doesn't
michael@0 66 // use a perfect hash, but at least we'll save a few rehashes along the way.
michael@0 67 // Next optimization here is to precompute the hash using something like
michael@0 68 // gperf, but one step at a time. :-)
michael@0 69 : mHash(ArrayLength(nsDomainEntry::entries))
michael@0 70 {
michael@0 71 }
michael@0 72
michael@0 73 nsresult
michael@0 74 nsEffectiveTLDService::Init()
michael@0 75 {
michael@0 76 const ETLDEntry *entries = nsDomainEntry::entries;
michael@0 77
michael@0 78 nsresult rv;
michael@0 79 mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
michael@0 80 if (NS_FAILED(rv)) return rv;
michael@0 81
michael@0 82 // Initialize eTLD hash from static array
michael@0 83 for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) {
michael@0 84 const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index);
michael@0 85 #ifdef DEBUG
michael@0 86 nsDependentCString name(domain);
michael@0 87 nsAutoCString normalizedName(domain);
michael@0 88 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
michael@0 89 "normalization failure!");
michael@0 90 NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
michael@0 91 #endif
michael@0 92 nsDomainEntry *entry = mHash.PutEntry(domain);
michael@0 93 NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
michael@0 94 entry->SetData(&entries[i]);
michael@0 95 }
michael@0 96
michael@0 97 MOZ_ASSERT(!gService);
michael@0 98 gService = this;
michael@0 99 RegisterWeakMemoryReporter(this);
michael@0 100
michael@0 101 return NS_OK;
michael@0 102 }
michael@0 103
michael@0 104 nsEffectiveTLDService::~nsEffectiveTLDService()
michael@0 105 {
michael@0 106 UnregisterWeakMemoryReporter(this);
michael@0 107 gService = nullptr;
michael@0 108 }
michael@0 109
michael@0 110 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
michael@0 111
michael@0 112 NS_IMETHODIMP
michael@0 113 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
michael@0 114 nsISupports* aData)
michael@0 115 {
michael@0 116 return MOZ_COLLECT_REPORT(
michael@0 117 "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES,
michael@0 118 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
michael@0 119 "Memory used by the effective TLD service.");
michael@0 120 }
michael@0 121
michael@0 122 size_t
michael@0 123 nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
michael@0 124 {
michael@0 125 size_t n = aMallocSizeOf(this);
michael@0 126 n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf);
michael@0 127
michael@0 128 // Measurement of the following members may be added later if DMD finds it is
michael@0 129 // worthwhile:
michael@0 130 // - mIDNService
michael@0 131
michael@0 132 return n;
michael@0 133 }
michael@0 134
michael@0 135 // External function for dealing with URI's correctly.
michael@0 136 // Pulls out the host portion from an nsIURI, and calls through to
michael@0 137 // GetPublicSuffixFromHost().
michael@0 138 NS_IMETHODIMP
michael@0 139 nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI,
michael@0 140 nsACString &aPublicSuffix)
michael@0 141 {
michael@0 142 NS_ENSURE_ARG_POINTER(aURI);
michael@0 143
michael@0 144 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
michael@0 145 NS_ENSURE_ARG_POINTER(innerURI);
michael@0 146
michael@0 147 nsAutoCString host;
michael@0 148 nsresult rv = innerURI->GetAsciiHost(host);
michael@0 149 if (NS_FAILED(rv)) return rv;
michael@0 150
michael@0 151 return GetBaseDomainInternal(host, 0, aPublicSuffix);
michael@0 152 }
michael@0 153
michael@0 154 // External function for dealing with URI's correctly.
michael@0 155 // Pulls out the host portion from an nsIURI, and calls through to
michael@0 156 // GetBaseDomainFromHost().
michael@0 157 NS_IMETHODIMP
michael@0 158 nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI,
michael@0 159 uint32_t aAdditionalParts,
michael@0 160 nsACString &aBaseDomain)
michael@0 161 {
michael@0 162 NS_ENSURE_ARG_POINTER(aURI);
michael@0 163 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
michael@0 164
michael@0 165 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
michael@0 166 NS_ENSURE_ARG_POINTER(innerURI);
michael@0 167
michael@0 168 nsAutoCString host;
michael@0 169 nsresult rv = innerURI->GetAsciiHost(host);
michael@0 170 if (NS_FAILED(rv)) return rv;
michael@0 171
michael@0 172 return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
michael@0 173 }
michael@0 174
michael@0 175 // External function for dealing with a host string directly: finds the public
michael@0 176 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
michael@0 177 NS_IMETHODIMP
michael@0 178 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
michael@0 179 nsACString &aPublicSuffix)
michael@0 180 {
michael@0 181 // Create a mutable copy of the hostname and normalize it to ACE.
michael@0 182 // This will fail if the hostname includes invalid characters.
michael@0 183 nsAutoCString normHostname(aHostname);
michael@0 184 nsresult rv = NormalizeHostname(normHostname);
michael@0 185 if (NS_FAILED(rv)) return rv;
michael@0 186
michael@0 187 return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
michael@0 188 }
michael@0 189
michael@0 190 // External function for dealing with a host string directly: finds the base
michael@0 191 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
michael@0 192 // requested. See GetBaseDomainInternal().
michael@0 193 NS_IMETHODIMP
michael@0 194 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
michael@0 195 uint32_t aAdditionalParts,
michael@0 196 nsACString &aBaseDomain)
michael@0 197 {
michael@0 198 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
michael@0 199
michael@0 200 // Create a mutable copy of the hostname and normalize it to ACE.
michael@0 201 // This will fail if the hostname includes invalid characters.
michael@0 202 nsAutoCString normHostname(aHostname);
michael@0 203 nsresult rv = NormalizeHostname(normHostname);
michael@0 204 if (NS_FAILED(rv)) return rv;
michael@0 205
michael@0 206 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
michael@0 207 }
michael@0 208
michael@0 209 NS_IMETHODIMP
michael@0 210 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
michael@0 211 nsACString& aBaseDomain)
michael@0 212 {
michael@0 213 // Create a mutable copy of the hostname and normalize it to ACE.
michael@0 214 // This will fail if the hostname includes invalid characters.
michael@0 215 nsAutoCString normHostname(aHostname);
michael@0 216 nsresult rv = NormalizeHostname(normHostname);
michael@0 217 NS_ENSURE_SUCCESS(rv, rv);
michael@0 218
michael@0 219 return GetBaseDomainInternal(normHostname, -1, aBaseDomain);
michael@0 220 }
michael@0 221
michael@0 222 // Finds the base domain for a host, with requested number of additional parts.
michael@0 223 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
michael@0 224 // if more subdomain parts are requested than are available, or if the hostname
michael@0 225 // includes characters that are not valid in a URL. Normalization is performed
michael@0 226 // on the host string and the result will be in UTF8.
michael@0 227 nsresult
michael@0 228 nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
michael@0 229 int32_t aAdditionalParts,
michael@0 230 nsACString &aBaseDomain)
michael@0 231 {
michael@0 232 if (aHostname.IsEmpty())
michael@0 233 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
michael@0 234
michael@0 235 // chomp any trailing dot, and keep track of it for later
michael@0 236 bool trailingDot = aHostname.Last() == '.';
michael@0 237 if (trailingDot)
michael@0 238 aHostname.Truncate(aHostname.Length() - 1);
michael@0 239
michael@0 240 // check the edge cases of the host being '.' or having a second trailing '.',
michael@0 241 // since subsequent checks won't catch it.
michael@0 242 if (aHostname.IsEmpty() || aHostname.Last() == '.')
michael@0 243 return NS_ERROR_INVALID_ARG;
michael@0 244
michael@0 245 // Check if we're dealing with an IPv4/IPv6 hostname, and return
michael@0 246 PRNetAddr addr;
michael@0 247 PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
michael@0 248 if (result == PR_SUCCESS)
michael@0 249 return NS_ERROR_HOST_IS_IP_ADDRESS;
michael@0 250
michael@0 251 // Walk up the domain tree, most specific to least specific,
michael@0 252 // looking for matches at each level. Note that a given level may
michael@0 253 // have multiple attributes (e.g. IsWild() and IsNormal()).
michael@0 254 const char *prevDomain = nullptr;
michael@0 255 const char *currDomain = aHostname.get();
michael@0 256 const char *nextDot = strchr(currDomain, '.');
michael@0 257 const char *end = currDomain + aHostname.Length();
michael@0 258 const char *eTLD = currDomain;
michael@0 259 while (1) {
michael@0 260 // sanity check the string we're about to look up: it should not begin with
michael@0 261 // a '.'; this would mean the hostname began with a '.' or had an
michael@0 262 // embedded '..' sequence.
michael@0 263 if (*currDomain == '.')
michael@0 264 return NS_ERROR_INVALID_ARG;
michael@0 265
michael@0 266 // perform the hash lookup.
michael@0 267 nsDomainEntry *entry = mHash.GetEntry(currDomain);
michael@0 268 if (entry) {
michael@0 269 if (entry->IsWild() && prevDomain) {
michael@0 270 // wildcard rules imply an eTLD one level inferior to the match.
michael@0 271 eTLD = prevDomain;
michael@0 272 break;
michael@0 273
michael@0 274 } else if (entry->IsNormal() || !nextDot) {
michael@0 275 // specific match, or we've hit the top domain level
michael@0 276 eTLD = currDomain;
michael@0 277 break;
michael@0 278
michael@0 279 } else if (entry->IsException()) {
michael@0 280 // exception rules imply an eTLD one level superior to the match.
michael@0 281 eTLD = nextDot + 1;
michael@0 282 break;
michael@0 283 }
michael@0 284 }
michael@0 285
michael@0 286 if (!nextDot) {
michael@0 287 // we've hit the top domain level; use it by default.
michael@0 288 eTLD = currDomain;
michael@0 289 break;
michael@0 290 }
michael@0 291
michael@0 292 prevDomain = currDomain;
michael@0 293 currDomain = nextDot + 1;
michael@0 294 nextDot = strchr(currDomain, '.');
michael@0 295 }
michael@0 296
michael@0 297 const char *begin, *iter;
michael@0 298 if (aAdditionalParts < 0) {
michael@0 299 NS_ASSERTION(aAdditionalParts == -1,
michael@0 300 "aAdditionalParts can't be negative and different from -1");
michael@0 301
michael@0 302 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
michael@0 303
michael@0 304 if (iter != eTLD) {
michael@0 305 iter++;
michael@0 306 }
michael@0 307 if (iter != eTLD) {
michael@0 308 aAdditionalParts = 0;
michael@0 309 }
michael@0 310 } else {
michael@0 311 // count off the number of requested domains.
michael@0 312 begin = aHostname.get();
michael@0 313 iter = eTLD;
michael@0 314
michael@0 315 while (1) {
michael@0 316 if (iter == begin)
michael@0 317 break;
michael@0 318
michael@0 319 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
michael@0 320 ++iter;
michael@0 321 ++aAdditionalParts;
michael@0 322 break;
michael@0 323 }
michael@0 324 }
michael@0 325 }
michael@0 326
michael@0 327 if (aAdditionalParts != 0)
michael@0 328 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
michael@0 329
michael@0 330 aBaseDomain = Substring(iter, end);
michael@0 331 // add on the trailing dot, if applicable
michael@0 332 if (trailingDot)
michael@0 333 aBaseDomain.Append('.');
michael@0 334
michael@0 335 return NS_OK;
michael@0 336 }
michael@0 337
michael@0 338 // Normalizes the given hostname, component by component. ASCII/ACE
michael@0 339 // components are lower-cased, and UTF-8 components are normalized per
michael@0 340 // RFC 3454 and converted to ACE.
michael@0 341 nsresult
michael@0 342 nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
michael@0 343 {
michael@0 344 if (!IsASCII(aHostname)) {
michael@0 345 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
michael@0 346 if (NS_FAILED(rv))
michael@0 347 return rv;
michael@0 348 }
michael@0 349
michael@0 350 ToLowerCase(aHostname);
michael@0 351 return NS_OK;
michael@0 352 }

mercurial