1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/netwerk/dns/nsEffectiveTLDService.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,352 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ 1.6 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +// This service reads a file of rules describing TLD-like domain names. For a 1.11 +// complete description of the expected file format and parsing rules, see 1.12 +// http://wiki.mozilla.org/Gecko:Effective_TLD_Service 1.13 + 1.14 +#include "mozilla/ArrayUtils.h" 1.15 +#include "mozilla/MemoryReporting.h" 1.16 + 1.17 +#include "nsEffectiveTLDService.h" 1.18 +#include "nsIIDNService.h" 1.19 +#include "nsNetUtil.h" 1.20 +#include "prnetdb.h" 1.21 + 1.22 +using namespace mozilla; 1.23 + 1.24 +NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, 1.25 + nsIMemoryReporter) 1.26 + 1.27 +// ---------------------------------------------------------------------- 1.28 + 1.29 +#define ETLD_STR_NUM_1(line) str##line 1.30 +#define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line) 1.31 +#define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__)) 1.32 + 1.33 +const ETLDEntry nsDomainEntry::entries[] = { 1.34 +#define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild }, 1.35 +#include "etld_data.inc" 1.36 +#undef ETLD_ENTRY 1.37 +}; 1.38 + 1.39 +const union nsDomainEntry::etld_strings nsDomainEntry::strings = { 1.40 + { 1.41 +#define ETLD_ENTRY(name, ex, wild) name, 1.42 +#include "etld_data.inc" 1.43 +#undef ETLD_ENTRY 1.44 + } 1.45 +}; 1.46 + 1.47 +// Dummy function to statically ensure that our indices don't overflow 1.48 +// the storage provided for them. 1.49 +void 1.50 +nsDomainEntry::FuncForStaticAsserts(void) 1.51 +{ 1.52 +#define ETLD_ENTRY(name, ex, wild) \ 1.53 + static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \ 1.54 + "invalid strtab index"); 1.55 +#include "etld_data.inc" 1.56 +#undef ETLD_ENTRY 1.57 +} 1.58 + 1.59 +#undef ETLD_ENTRY_OFFSET 1.60 +#undef ETLD_STR_NUM 1.61 +#undef ETLD_STR_NUM1 1.62 + 1.63 +// ---------------------------------------------------------------------- 1.64 + 1.65 +static nsEffectiveTLDService *gService = nullptr; 1.66 + 1.67 +nsEffectiveTLDService::nsEffectiveTLDService() 1.68 + // We'll probably have to rehash at least once, since nsTHashtable doesn't 1.69 + // use a perfect hash, but at least we'll save a few rehashes along the way. 1.70 + // Next optimization here is to precompute the hash using something like 1.71 + // gperf, but one step at a time. :-) 1.72 + : mHash(ArrayLength(nsDomainEntry::entries)) 1.73 +{ 1.74 +} 1.75 + 1.76 +nsresult 1.77 +nsEffectiveTLDService::Init() 1.78 +{ 1.79 + const ETLDEntry *entries = nsDomainEntry::entries; 1.80 + 1.81 + nsresult rv; 1.82 + mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); 1.83 + if (NS_FAILED(rv)) return rv; 1.84 + 1.85 + // Initialize eTLD hash from static array 1.86 + for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) { 1.87 + const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index); 1.88 +#ifdef DEBUG 1.89 + nsDependentCString name(domain); 1.90 + nsAutoCString normalizedName(domain); 1.91 + NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)), 1.92 + "normalization failure!"); 1.93 + NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!"); 1.94 +#endif 1.95 + nsDomainEntry *entry = mHash.PutEntry(domain); 1.96 + NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY); 1.97 + entry->SetData(&entries[i]); 1.98 + } 1.99 + 1.100 + MOZ_ASSERT(!gService); 1.101 + gService = this; 1.102 + RegisterWeakMemoryReporter(this); 1.103 + 1.104 + return NS_OK; 1.105 +} 1.106 + 1.107 +nsEffectiveTLDService::~nsEffectiveTLDService() 1.108 +{ 1.109 + UnregisterWeakMemoryReporter(this); 1.110 + gService = nullptr; 1.111 +} 1.112 + 1.113 +MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf) 1.114 + 1.115 +NS_IMETHODIMP 1.116 +nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport, 1.117 + nsISupports* aData) 1.118 +{ 1.119 + return MOZ_COLLECT_REPORT( 1.120 + "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES, 1.121 + SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf), 1.122 + "Memory used by the effective TLD service."); 1.123 +} 1.124 + 1.125 +size_t 1.126 +nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) 1.127 +{ 1.128 + size_t n = aMallocSizeOf(this); 1.129 + n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf); 1.130 + 1.131 + // Measurement of the following members may be added later if DMD finds it is 1.132 + // worthwhile: 1.133 + // - mIDNService 1.134 + 1.135 + return n; 1.136 +} 1.137 + 1.138 +// External function for dealing with URI's correctly. 1.139 +// Pulls out the host portion from an nsIURI, and calls through to 1.140 +// GetPublicSuffixFromHost(). 1.141 +NS_IMETHODIMP 1.142 +nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI, 1.143 + nsACString &aPublicSuffix) 1.144 +{ 1.145 + NS_ENSURE_ARG_POINTER(aURI); 1.146 + 1.147 + nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); 1.148 + NS_ENSURE_ARG_POINTER(innerURI); 1.149 + 1.150 + nsAutoCString host; 1.151 + nsresult rv = innerURI->GetAsciiHost(host); 1.152 + if (NS_FAILED(rv)) return rv; 1.153 + 1.154 + return GetBaseDomainInternal(host, 0, aPublicSuffix); 1.155 +} 1.156 + 1.157 +// External function for dealing with URI's correctly. 1.158 +// Pulls out the host portion from an nsIURI, and calls through to 1.159 +// GetBaseDomainFromHost(). 1.160 +NS_IMETHODIMP 1.161 +nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI, 1.162 + uint32_t aAdditionalParts, 1.163 + nsACString &aBaseDomain) 1.164 +{ 1.165 + NS_ENSURE_ARG_POINTER(aURI); 1.166 + NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); 1.167 + 1.168 + nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); 1.169 + NS_ENSURE_ARG_POINTER(innerURI); 1.170 + 1.171 + nsAutoCString host; 1.172 + nsresult rv = innerURI->GetAsciiHost(host); 1.173 + if (NS_FAILED(rv)) return rv; 1.174 + 1.175 + return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain); 1.176 +} 1.177 + 1.178 +// External function for dealing with a host string directly: finds the public 1.179 +// suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal(). 1.180 +NS_IMETHODIMP 1.181 +nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname, 1.182 + nsACString &aPublicSuffix) 1.183 +{ 1.184 + // Create a mutable copy of the hostname and normalize it to ACE. 1.185 + // This will fail if the hostname includes invalid characters. 1.186 + nsAutoCString normHostname(aHostname); 1.187 + nsresult rv = NormalizeHostname(normHostname); 1.188 + if (NS_FAILED(rv)) return rv; 1.189 + 1.190 + return GetBaseDomainInternal(normHostname, 0, aPublicSuffix); 1.191 +} 1.192 + 1.193 +// External function for dealing with a host string directly: finds the base 1.194 +// domain (e.g. www.co.uk) for the given hostname and number of subdomain parts 1.195 +// requested. See GetBaseDomainInternal(). 1.196 +NS_IMETHODIMP 1.197 +nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname, 1.198 + uint32_t aAdditionalParts, 1.199 + nsACString &aBaseDomain) 1.200 +{ 1.201 + NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); 1.202 + 1.203 + // Create a mutable copy of the hostname and normalize it to ACE. 1.204 + // This will fail if the hostname includes invalid characters. 1.205 + nsAutoCString normHostname(aHostname); 1.206 + nsresult rv = NormalizeHostname(normHostname); 1.207 + if (NS_FAILED(rv)) return rv; 1.208 + 1.209 + return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain); 1.210 +} 1.211 + 1.212 +NS_IMETHODIMP 1.213 +nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname, 1.214 + nsACString& aBaseDomain) 1.215 +{ 1.216 + // Create a mutable copy of the hostname and normalize it to ACE. 1.217 + // This will fail if the hostname includes invalid characters. 1.218 + nsAutoCString normHostname(aHostname); 1.219 + nsresult rv = NormalizeHostname(normHostname); 1.220 + NS_ENSURE_SUCCESS(rv, rv); 1.221 + 1.222 + return GetBaseDomainInternal(normHostname, -1, aBaseDomain); 1.223 +} 1.224 + 1.225 +// Finds the base domain for a host, with requested number of additional parts. 1.226 +// This will fail, generating an error, if the host is an IPv4/IPv6 address, 1.227 +// if more subdomain parts are requested than are available, or if the hostname 1.228 +// includes characters that are not valid in a URL. Normalization is performed 1.229 +// on the host string and the result will be in UTF8. 1.230 +nsresult 1.231 +nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, 1.232 + int32_t aAdditionalParts, 1.233 + nsACString &aBaseDomain) 1.234 +{ 1.235 + if (aHostname.IsEmpty()) 1.236 + return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; 1.237 + 1.238 + // chomp any trailing dot, and keep track of it for later 1.239 + bool trailingDot = aHostname.Last() == '.'; 1.240 + if (trailingDot) 1.241 + aHostname.Truncate(aHostname.Length() - 1); 1.242 + 1.243 + // check the edge cases of the host being '.' or having a second trailing '.', 1.244 + // since subsequent checks won't catch it. 1.245 + if (aHostname.IsEmpty() || aHostname.Last() == '.') 1.246 + return NS_ERROR_INVALID_ARG; 1.247 + 1.248 + // Check if we're dealing with an IPv4/IPv6 hostname, and return 1.249 + PRNetAddr addr; 1.250 + PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr); 1.251 + if (result == PR_SUCCESS) 1.252 + return NS_ERROR_HOST_IS_IP_ADDRESS; 1.253 + 1.254 + // Walk up the domain tree, most specific to least specific, 1.255 + // looking for matches at each level. Note that a given level may 1.256 + // have multiple attributes (e.g. IsWild() and IsNormal()). 1.257 + const char *prevDomain = nullptr; 1.258 + const char *currDomain = aHostname.get(); 1.259 + const char *nextDot = strchr(currDomain, '.'); 1.260 + const char *end = currDomain + aHostname.Length(); 1.261 + const char *eTLD = currDomain; 1.262 + while (1) { 1.263 + // sanity check the string we're about to look up: it should not begin with 1.264 + // a '.'; this would mean the hostname began with a '.' or had an 1.265 + // embedded '..' sequence. 1.266 + if (*currDomain == '.') 1.267 + return NS_ERROR_INVALID_ARG; 1.268 + 1.269 + // perform the hash lookup. 1.270 + nsDomainEntry *entry = mHash.GetEntry(currDomain); 1.271 + if (entry) { 1.272 + if (entry->IsWild() && prevDomain) { 1.273 + // wildcard rules imply an eTLD one level inferior to the match. 1.274 + eTLD = prevDomain; 1.275 + break; 1.276 + 1.277 + } else if (entry->IsNormal() || !nextDot) { 1.278 + // specific match, or we've hit the top domain level 1.279 + eTLD = currDomain; 1.280 + break; 1.281 + 1.282 + } else if (entry->IsException()) { 1.283 + // exception rules imply an eTLD one level superior to the match. 1.284 + eTLD = nextDot + 1; 1.285 + break; 1.286 + } 1.287 + } 1.288 + 1.289 + if (!nextDot) { 1.290 + // we've hit the top domain level; use it by default. 1.291 + eTLD = currDomain; 1.292 + break; 1.293 + } 1.294 + 1.295 + prevDomain = currDomain; 1.296 + currDomain = nextDot + 1; 1.297 + nextDot = strchr(currDomain, '.'); 1.298 + } 1.299 + 1.300 + const char *begin, *iter; 1.301 + if (aAdditionalParts < 0) { 1.302 + NS_ASSERTION(aAdditionalParts == -1, 1.303 + "aAdditionalParts can't be negative and different from -1"); 1.304 + 1.305 + for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++); 1.306 + 1.307 + if (iter != eTLD) { 1.308 + iter++; 1.309 + } 1.310 + if (iter != eTLD) { 1.311 + aAdditionalParts = 0; 1.312 + } 1.313 + } else { 1.314 + // count off the number of requested domains. 1.315 + begin = aHostname.get(); 1.316 + iter = eTLD; 1.317 + 1.318 + while (1) { 1.319 + if (iter == begin) 1.320 + break; 1.321 + 1.322 + if (*(--iter) == '.' && aAdditionalParts-- == 0) { 1.323 + ++iter; 1.324 + ++aAdditionalParts; 1.325 + break; 1.326 + } 1.327 + } 1.328 + } 1.329 + 1.330 + if (aAdditionalParts != 0) 1.331 + return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; 1.332 + 1.333 + aBaseDomain = Substring(iter, end); 1.334 + // add on the trailing dot, if applicable 1.335 + if (trailingDot) 1.336 + aBaseDomain.Append('.'); 1.337 + 1.338 + return NS_OK; 1.339 +} 1.340 + 1.341 +// Normalizes the given hostname, component by component. ASCII/ACE 1.342 +// components are lower-cased, and UTF-8 components are normalized per 1.343 +// RFC 3454 and converted to ACE. 1.344 +nsresult 1.345 +nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname) 1.346 +{ 1.347 + if (!IsASCII(aHostname)) { 1.348 + nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname); 1.349 + if (NS_FAILED(rv)) 1.350 + return rv; 1.351 + } 1.352 + 1.353 + ToLowerCase(aHostname); 1.354 + return NS_OK; 1.355 +}