Wed, 31 Dec 2014 06:55:46 +0100
Added tag TORBROWSER_REPLICA for changeset 6474c204b198
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
michael@0 | 2 | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
michael@0 | 3 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | // This service reads a file of rules describing TLD-like domain names. For a |
michael@0 | 8 | // complete description of the expected file format and parsing rules, see |
michael@0 | 9 | // http://wiki.mozilla.org/Gecko:Effective_TLD_Service |
michael@0 | 10 | |
michael@0 | 11 | #include "mozilla/ArrayUtils.h" |
michael@0 | 12 | #include "mozilla/MemoryReporting.h" |
michael@0 | 13 | |
michael@0 | 14 | #include "nsEffectiveTLDService.h" |
michael@0 | 15 | #include "nsIIDNService.h" |
michael@0 | 16 | #include "nsNetUtil.h" |
michael@0 | 17 | #include "prnetdb.h" |
michael@0 | 18 | |
michael@0 | 19 | using namespace mozilla; |
michael@0 | 20 | |
michael@0 | 21 | NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, |
michael@0 | 22 | nsIMemoryReporter) |
michael@0 | 23 | |
michael@0 | 24 | // ---------------------------------------------------------------------- |
michael@0 | 25 | |
michael@0 | 26 | #define ETLD_STR_NUM_1(line) str##line |
michael@0 | 27 | #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line) |
michael@0 | 28 | #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__)) |
michael@0 | 29 | |
michael@0 | 30 | const ETLDEntry nsDomainEntry::entries[] = { |
michael@0 | 31 | #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild }, |
michael@0 | 32 | #include "etld_data.inc" |
michael@0 | 33 | #undef ETLD_ENTRY |
michael@0 | 34 | }; |
michael@0 | 35 | |
michael@0 | 36 | const union nsDomainEntry::etld_strings nsDomainEntry::strings = { |
michael@0 | 37 | { |
michael@0 | 38 | #define ETLD_ENTRY(name, ex, wild) name, |
michael@0 | 39 | #include "etld_data.inc" |
michael@0 | 40 | #undef ETLD_ENTRY |
michael@0 | 41 | } |
michael@0 | 42 | }; |
michael@0 | 43 | |
michael@0 | 44 | // Dummy function to statically ensure that our indices don't overflow |
michael@0 | 45 | // the storage provided for them. |
michael@0 | 46 | void |
michael@0 | 47 | nsDomainEntry::FuncForStaticAsserts(void) |
michael@0 | 48 | { |
michael@0 | 49 | #define ETLD_ENTRY(name, ex, wild) \ |
michael@0 | 50 | static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \ |
michael@0 | 51 | "invalid strtab index"); |
michael@0 | 52 | #include "etld_data.inc" |
michael@0 | 53 | #undef ETLD_ENTRY |
michael@0 | 54 | } |
michael@0 | 55 | |
michael@0 | 56 | #undef ETLD_ENTRY_OFFSET |
michael@0 | 57 | #undef ETLD_STR_NUM |
michael@0 | 58 | #undef ETLD_STR_NUM1 |
michael@0 | 59 | |
michael@0 | 60 | // ---------------------------------------------------------------------- |
michael@0 | 61 | |
michael@0 | 62 | static nsEffectiveTLDService *gService = nullptr; |
michael@0 | 63 | |
michael@0 | 64 | nsEffectiveTLDService::nsEffectiveTLDService() |
michael@0 | 65 | // We'll probably have to rehash at least once, since nsTHashtable doesn't |
michael@0 | 66 | // use a perfect hash, but at least we'll save a few rehashes along the way. |
michael@0 | 67 | // Next optimization here is to precompute the hash using something like |
michael@0 | 68 | // gperf, but one step at a time. :-) |
michael@0 | 69 | : mHash(ArrayLength(nsDomainEntry::entries)) |
michael@0 | 70 | { |
michael@0 | 71 | } |
michael@0 | 72 | |
michael@0 | 73 | nsresult |
michael@0 | 74 | nsEffectiveTLDService::Init() |
michael@0 | 75 | { |
michael@0 | 76 | const ETLDEntry *entries = nsDomainEntry::entries; |
michael@0 | 77 | |
michael@0 | 78 | nsresult rv; |
michael@0 | 79 | mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); |
michael@0 | 80 | if (NS_FAILED(rv)) return rv; |
michael@0 | 81 | |
michael@0 | 82 | // Initialize eTLD hash from static array |
michael@0 | 83 | for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) { |
michael@0 | 84 | const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index); |
michael@0 | 85 | #ifdef DEBUG |
michael@0 | 86 | nsDependentCString name(domain); |
michael@0 | 87 | nsAutoCString normalizedName(domain); |
michael@0 | 88 | NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)), |
michael@0 | 89 | "normalization failure!"); |
michael@0 | 90 | NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!"); |
michael@0 | 91 | #endif |
michael@0 | 92 | nsDomainEntry *entry = mHash.PutEntry(domain); |
michael@0 | 93 | NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY); |
michael@0 | 94 | entry->SetData(&entries[i]); |
michael@0 | 95 | } |
michael@0 | 96 | |
michael@0 | 97 | MOZ_ASSERT(!gService); |
michael@0 | 98 | gService = this; |
michael@0 | 99 | RegisterWeakMemoryReporter(this); |
michael@0 | 100 | |
michael@0 | 101 | return NS_OK; |
michael@0 | 102 | } |
michael@0 | 103 | |
michael@0 | 104 | nsEffectiveTLDService::~nsEffectiveTLDService() |
michael@0 | 105 | { |
michael@0 | 106 | UnregisterWeakMemoryReporter(this); |
michael@0 | 107 | gService = nullptr; |
michael@0 | 108 | } |
michael@0 | 109 | |
michael@0 | 110 | MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf) |
michael@0 | 111 | |
michael@0 | 112 | NS_IMETHODIMP |
michael@0 | 113 | nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport, |
michael@0 | 114 | nsISupports* aData) |
michael@0 | 115 | { |
michael@0 | 116 | return MOZ_COLLECT_REPORT( |
michael@0 | 117 | "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES, |
michael@0 | 118 | SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf), |
michael@0 | 119 | "Memory used by the effective TLD service."); |
michael@0 | 120 | } |
michael@0 | 121 | |
michael@0 | 122 | size_t |
michael@0 | 123 | nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) |
michael@0 | 124 | { |
michael@0 | 125 | size_t n = aMallocSizeOf(this); |
michael@0 | 126 | n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf); |
michael@0 | 127 | |
michael@0 | 128 | // Measurement of the following members may be added later if DMD finds it is |
michael@0 | 129 | // worthwhile: |
michael@0 | 130 | // - mIDNService |
michael@0 | 131 | |
michael@0 | 132 | return n; |
michael@0 | 133 | } |
michael@0 | 134 | |
michael@0 | 135 | // External function for dealing with URI's correctly. |
michael@0 | 136 | // Pulls out the host portion from an nsIURI, and calls through to |
michael@0 | 137 | // GetPublicSuffixFromHost(). |
michael@0 | 138 | NS_IMETHODIMP |
michael@0 | 139 | nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI, |
michael@0 | 140 | nsACString &aPublicSuffix) |
michael@0 | 141 | { |
michael@0 | 142 | NS_ENSURE_ARG_POINTER(aURI); |
michael@0 | 143 | |
michael@0 | 144 | nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); |
michael@0 | 145 | NS_ENSURE_ARG_POINTER(innerURI); |
michael@0 | 146 | |
michael@0 | 147 | nsAutoCString host; |
michael@0 | 148 | nsresult rv = innerURI->GetAsciiHost(host); |
michael@0 | 149 | if (NS_FAILED(rv)) return rv; |
michael@0 | 150 | |
michael@0 | 151 | return GetBaseDomainInternal(host, 0, aPublicSuffix); |
michael@0 | 152 | } |
michael@0 | 153 | |
michael@0 | 154 | // External function for dealing with URI's correctly. |
michael@0 | 155 | // Pulls out the host portion from an nsIURI, and calls through to |
michael@0 | 156 | // GetBaseDomainFromHost(). |
michael@0 | 157 | NS_IMETHODIMP |
michael@0 | 158 | nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI, |
michael@0 | 159 | uint32_t aAdditionalParts, |
michael@0 | 160 | nsACString &aBaseDomain) |
michael@0 | 161 | { |
michael@0 | 162 | NS_ENSURE_ARG_POINTER(aURI); |
michael@0 | 163 | NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); |
michael@0 | 164 | |
michael@0 | 165 | nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); |
michael@0 | 166 | NS_ENSURE_ARG_POINTER(innerURI); |
michael@0 | 167 | |
michael@0 | 168 | nsAutoCString host; |
michael@0 | 169 | nsresult rv = innerURI->GetAsciiHost(host); |
michael@0 | 170 | if (NS_FAILED(rv)) return rv; |
michael@0 | 171 | |
michael@0 | 172 | return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain); |
michael@0 | 173 | } |
michael@0 | 174 | |
michael@0 | 175 | // External function for dealing with a host string directly: finds the public |
michael@0 | 176 | // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal(). |
michael@0 | 177 | NS_IMETHODIMP |
michael@0 | 178 | nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname, |
michael@0 | 179 | nsACString &aPublicSuffix) |
michael@0 | 180 | { |
michael@0 | 181 | // Create a mutable copy of the hostname and normalize it to ACE. |
michael@0 | 182 | // This will fail if the hostname includes invalid characters. |
michael@0 | 183 | nsAutoCString normHostname(aHostname); |
michael@0 | 184 | nsresult rv = NormalizeHostname(normHostname); |
michael@0 | 185 | if (NS_FAILED(rv)) return rv; |
michael@0 | 186 | |
michael@0 | 187 | return GetBaseDomainInternal(normHostname, 0, aPublicSuffix); |
michael@0 | 188 | } |
michael@0 | 189 | |
michael@0 | 190 | // External function for dealing with a host string directly: finds the base |
michael@0 | 191 | // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts |
michael@0 | 192 | // requested. See GetBaseDomainInternal(). |
michael@0 | 193 | NS_IMETHODIMP |
michael@0 | 194 | nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname, |
michael@0 | 195 | uint32_t aAdditionalParts, |
michael@0 | 196 | nsACString &aBaseDomain) |
michael@0 | 197 | { |
michael@0 | 198 | NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); |
michael@0 | 199 | |
michael@0 | 200 | // Create a mutable copy of the hostname and normalize it to ACE. |
michael@0 | 201 | // This will fail if the hostname includes invalid characters. |
michael@0 | 202 | nsAutoCString normHostname(aHostname); |
michael@0 | 203 | nsresult rv = NormalizeHostname(normHostname); |
michael@0 | 204 | if (NS_FAILED(rv)) return rv; |
michael@0 | 205 | |
michael@0 | 206 | return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain); |
michael@0 | 207 | } |
michael@0 | 208 | |
michael@0 | 209 | NS_IMETHODIMP |
michael@0 | 210 | nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname, |
michael@0 | 211 | nsACString& aBaseDomain) |
michael@0 | 212 | { |
michael@0 | 213 | // Create a mutable copy of the hostname and normalize it to ACE. |
michael@0 | 214 | // This will fail if the hostname includes invalid characters. |
michael@0 | 215 | nsAutoCString normHostname(aHostname); |
michael@0 | 216 | nsresult rv = NormalizeHostname(normHostname); |
michael@0 | 217 | NS_ENSURE_SUCCESS(rv, rv); |
michael@0 | 218 | |
michael@0 | 219 | return GetBaseDomainInternal(normHostname, -1, aBaseDomain); |
michael@0 | 220 | } |
michael@0 | 221 | |
michael@0 | 222 | // Finds the base domain for a host, with requested number of additional parts. |
michael@0 | 223 | // This will fail, generating an error, if the host is an IPv4/IPv6 address, |
michael@0 | 224 | // if more subdomain parts are requested than are available, or if the hostname |
michael@0 | 225 | // includes characters that are not valid in a URL. Normalization is performed |
michael@0 | 226 | // on the host string and the result will be in UTF8. |
michael@0 | 227 | nsresult |
michael@0 | 228 | nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, |
michael@0 | 229 | int32_t aAdditionalParts, |
michael@0 | 230 | nsACString &aBaseDomain) |
michael@0 | 231 | { |
michael@0 | 232 | if (aHostname.IsEmpty()) |
michael@0 | 233 | return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; |
michael@0 | 234 | |
michael@0 | 235 | // chomp any trailing dot, and keep track of it for later |
michael@0 | 236 | bool trailingDot = aHostname.Last() == '.'; |
michael@0 | 237 | if (trailingDot) |
michael@0 | 238 | aHostname.Truncate(aHostname.Length() - 1); |
michael@0 | 239 | |
michael@0 | 240 | // check the edge cases of the host being '.' or having a second trailing '.', |
michael@0 | 241 | // since subsequent checks won't catch it. |
michael@0 | 242 | if (aHostname.IsEmpty() || aHostname.Last() == '.') |
michael@0 | 243 | return NS_ERROR_INVALID_ARG; |
michael@0 | 244 | |
michael@0 | 245 | // Check if we're dealing with an IPv4/IPv6 hostname, and return |
michael@0 | 246 | PRNetAddr addr; |
michael@0 | 247 | PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr); |
michael@0 | 248 | if (result == PR_SUCCESS) |
michael@0 | 249 | return NS_ERROR_HOST_IS_IP_ADDRESS; |
michael@0 | 250 | |
michael@0 | 251 | // Walk up the domain tree, most specific to least specific, |
michael@0 | 252 | // looking for matches at each level. Note that a given level may |
michael@0 | 253 | // have multiple attributes (e.g. IsWild() and IsNormal()). |
michael@0 | 254 | const char *prevDomain = nullptr; |
michael@0 | 255 | const char *currDomain = aHostname.get(); |
michael@0 | 256 | const char *nextDot = strchr(currDomain, '.'); |
michael@0 | 257 | const char *end = currDomain + aHostname.Length(); |
michael@0 | 258 | const char *eTLD = currDomain; |
michael@0 | 259 | while (1) { |
michael@0 | 260 | // sanity check the string we're about to look up: it should not begin with |
michael@0 | 261 | // a '.'; this would mean the hostname began with a '.' or had an |
michael@0 | 262 | // embedded '..' sequence. |
michael@0 | 263 | if (*currDomain == '.') |
michael@0 | 264 | return NS_ERROR_INVALID_ARG; |
michael@0 | 265 | |
michael@0 | 266 | // perform the hash lookup. |
michael@0 | 267 | nsDomainEntry *entry = mHash.GetEntry(currDomain); |
michael@0 | 268 | if (entry) { |
michael@0 | 269 | if (entry->IsWild() && prevDomain) { |
michael@0 | 270 | // wildcard rules imply an eTLD one level inferior to the match. |
michael@0 | 271 | eTLD = prevDomain; |
michael@0 | 272 | break; |
michael@0 | 273 | |
michael@0 | 274 | } else if (entry->IsNormal() || !nextDot) { |
michael@0 | 275 | // specific match, or we've hit the top domain level |
michael@0 | 276 | eTLD = currDomain; |
michael@0 | 277 | break; |
michael@0 | 278 | |
michael@0 | 279 | } else if (entry->IsException()) { |
michael@0 | 280 | // exception rules imply an eTLD one level superior to the match. |
michael@0 | 281 | eTLD = nextDot + 1; |
michael@0 | 282 | break; |
michael@0 | 283 | } |
michael@0 | 284 | } |
michael@0 | 285 | |
michael@0 | 286 | if (!nextDot) { |
michael@0 | 287 | // we've hit the top domain level; use it by default. |
michael@0 | 288 | eTLD = currDomain; |
michael@0 | 289 | break; |
michael@0 | 290 | } |
michael@0 | 291 | |
michael@0 | 292 | prevDomain = currDomain; |
michael@0 | 293 | currDomain = nextDot + 1; |
michael@0 | 294 | nextDot = strchr(currDomain, '.'); |
michael@0 | 295 | } |
michael@0 | 296 | |
michael@0 | 297 | const char *begin, *iter; |
michael@0 | 298 | if (aAdditionalParts < 0) { |
michael@0 | 299 | NS_ASSERTION(aAdditionalParts == -1, |
michael@0 | 300 | "aAdditionalParts can't be negative and different from -1"); |
michael@0 | 301 | |
michael@0 | 302 | for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++); |
michael@0 | 303 | |
michael@0 | 304 | if (iter != eTLD) { |
michael@0 | 305 | iter++; |
michael@0 | 306 | } |
michael@0 | 307 | if (iter != eTLD) { |
michael@0 | 308 | aAdditionalParts = 0; |
michael@0 | 309 | } |
michael@0 | 310 | } else { |
michael@0 | 311 | // count off the number of requested domains. |
michael@0 | 312 | begin = aHostname.get(); |
michael@0 | 313 | iter = eTLD; |
michael@0 | 314 | |
michael@0 | 315 | while (1) { |
michael@0 | 316 | if (iter == begin) |
michael@0 | 317 | break; |
michael@0 | 318 | |
michael@0 | 319 | if (*(--iter) == '.' && aAdditionalParts-- == 0) { |
michael@0 | 320 | ++iter; |
michael@0 | 321 | ++aAdditionalParts; |
michael@0 | 322 | break; |
michael@0 | 323 | } |
michael@0 | 324 | } |
michael@0 | 325 | } |
michael@0 | 326 | |
michael@0 | 327 | if (aAdditionalParts != 0) |
michael@0 | 328 | return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; |
michael@0 | 329 | |
michael@0 | 330 | aBaseDomain = Substring(iter, end); |
michael@0 | 331 | // add on the trailing dot, if applicable |
michael@0 | 332 | if (trailingDot) |
michael@0 | 333 | aBaseDomain.Append('.'); |
michael@0 | 334 | |
michael@0 | 335 | return NS_OK; |
michael@0 | 336 | } |
michael@0 | 337 | |
michael@0 | 338 | // Normalizes the given hostname, component by component. ASCII/ACE |
michael@0 | 339 | // components are lower-cased, and UTF-8 components are normalized per |
michael@0 | 340 | // RFC 3454 and converted to ACE. |
michael@0 | 341 | nsresult |
michael@0 | 342 | nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname) |
michael@0 | 343 | { |
michael@0 | 344 | if (!IsASCII(aHostname)) { |
michael@0 | 345 | nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname); |
michael@0 | 346 | if (NS_FAILED(rv)) |
michael@0 | 347 | return rv; |
michael@0 | 348 | } |
michael@0 | 349 | |
michael@0 | 350 | ToLowerCase(aHostname); |
michael@0 | 351 | return NS_OK; |
michael@0 | 352 | } |