michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* vim: set ts=8 sts=2 et sw=2 tw=80: */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: // This service reads a file of rules describing TLD-like domain names. For a michael@0: // complete description of the expected file format and parsing rules, see michael@0: // http://wiki.mozilla.org/Gecko:Effective_TLD_Service michael@0: michael@0: #include "mozilla/ArrayUtils.h" michael@0: #include "mozilla/MemoryReporting.h" michael@0: michael@0: #include "nsEffectiveTLDService.h" michael@0: #include "nsIIDNService.h" michael@0: #include "nsNetUtil.h" michael@0: #include "prnetdb.h" michael@0: michael@0: using namespace mozilla; michael@0: michael@0: NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, michael@0: nsIMemoryReporter) michael@0: michael@0: // ---------------------------------------------------------------------- michael@0: michael@0: #define ETLD_STR_NUM_1(line) str##line michael@0: #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line) michael@0: #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__)) michael@0: michael@0: const ETLDEntry nsDomainEntry::entries[] = { michael@0: #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild }, michael@0: #include "etld_data.inc" michael@0: #undef ETLD_ENTRY michael@0: }; michael@0: michael@0: const union nsDomainEntry::etld_strings nsDomainEntry::strings = { michael@0: { michael@0: #define ETLD_ENTRY(name, ex, wild) name, michael@0: #include "etld_data.inc" michael@0: #undef ETLD_ENTRY michael@0: } michael@0: }; michael@0: michael@0: // Dummy function to statically ensure that our indices don't overflow michael@0: // the storage provided for them. michael@0: void michael@0: nsDomainEntry::FuncForStaticAsserts(void) michael@0: { michael@0: #define ETLD_ENTRY(name, ex, wild) \ michael@0: static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \ michael@0: "invalid strtab index"); michael@0: #include "etld_data.inc" michael@0: #undef ETLD_ENTRY michael@0: } michael@0: michael@0: #undef ETLD_ENTRY_OFFSET michael@0: #undef ETLD_STR_NUM michael@0: #undef ETLD_STR_NUM1 michael@0: michael@0: // ---------------------------------------------------------------------- michael@0: michael@0: static nsEffectiveTLDService *gService = nullptr; michael@0: michael@0: nsEffectiveTLDService::nsEffectiveTLDService() michael@0: // We'll probably have to rehash at least once, since nsTHashtable doesn't michael@0: // use a perfect hash, but at least we'll save a few rehashes along the way. michael@0: // Next optimization here is to precompute the hash using something like michael@0: // gperf, but one step at a time. :-) michael@0: : mHash(ArrayLength(nsDomainEntry::entries)) michael@0: { michael@0: } michael@0: michael@0: nsresult michael@0: nsEffectiveTLDService::Init() michael@0: { michael@0: const ETLDEntry *entries = nsDomainEntry::entries; michael@0: michael@0: nsresult rv; michael@0: mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: // Initialize eTLD hash from static array michael@0: for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) { michael@0: const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index); michael@0: #ifdef DEBUG michael@0: nsDependentCString name(domain); michael@0: nsAutoCString normalizedName(domain); michael@0: NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)), michael@0: "normalization failure!"); michael@0: NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!"); michael@0: #endif michael@0: nsDomainEntry *entry = mHash.PutEntry(domain); michael@0: NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY); michael@0: entry->SetData(&entries[i]); michael@0: } michael@0: michael@0: MOZ_ASSERT(!gService); michael@0: gService = this; michael@0: RegisterWeakMemoryReporter(this); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: nsEffectiveTLDService::~nsEffectiveTLDService() michael@0: { michael@0: UnregisterWeakMemoryReporter(this); michael@0: gService = nullptr; michael@0: } michael@0: michael@0: MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf) michael@0: michael@0: NS_IMETHODIMP michael@0: nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport, michael@0: nsISupports* aData) michael@0: { michael@0: return MOZ_COLLECT_REPORT( michael@0: "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES, michael@0: SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf), michael@0: "Memory used by the effective TLD service."); michael@0: } michael@0: michael@0: size_t michael@0: nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) michael@0: { michael@0: size_t n = aMallocSizeOf(this); michael@0: n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf); michael@0: michael@0: // Measurement of the following members may be added later if DMD finds it is michael@0: // worthwhile: michael@0: // - mIDNService michael@0: michael@0: return n; michael@0: } michael@0: michael@0: // External function for dealing with URI's correctly. michael@0: // Pulls out the host portion from an nsIURI, and calls through to michael@0: // GetPublicSuffixFromHost(). michael@0: NS_IMETHODIMP michael@0: nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI, michael@0: nsACString &aPublicSuffix) michael@0: { michael@0: NS_ENSURE_ARG_POINTER(aURI); michael@0: michael@0: nsCOMPtr innerURI = NS_GetInnermostURI(aURI); michael@0: NS_ENSURE_ARG_POINTER(innerURI); michael@0: michael@0: nsAutoCString host; michael@0: nsresult rv = innerURI->GetAsciiHost(host); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: return GetBaseDomainInternal(host, 0, aPublicSuffix); michael@0: } michael@0: michael@0: // External function for dealing with URI's correctly. michael@0: // Pulls out the host portion from an nsIURI, and calls through to michael@0: // GetBaseDomainFromHost(). michael@0: NS_IMETHODIMP michael@0: nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI, michael@0: uint32_t aAdditionalParts, michael@0: nsACString &aBaseDomain) michael@0: { michael@0: NS_ENSURE_ARG_POINTER(aURI); michael@0: NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); michael@0: michael@0: nsCOMPtr innerURI = NS_GetInnermostURI(aURI); michael@0: NS_ENSURE_ARG_POINTER(innerURI); michael@0: michael@0: nsAutoCString host; michael@0: nsresult rv = innerURI->GetAsciiHost(host); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain); michael@0: } michael@0: michael@0: // External function for dealing with a host string directly: finds the public michael@0: // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal(). michael@0: NS_IMETHODIMP michael@0: nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname, michael@0: nsACString &aPublicSuffix) michael@0: { michael@0: // Create a mutable copy of the hostname and normalize it to ACE. michael@0: // This will fail if the hostname includes invalid characters. michael@0: nsAutoCString normHostname(aHostname); michael@0: nsresult rv = NormalizeHostname(normHostname); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: return GetBaseDomainInternal(normHostname, 0, aPublicSuffix); michael@0: } michael@0: michael@0: // External function for dealing with a host string directly: finds the base michael@0: // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts michael@0: // requested. See GetBaseDomainInternal(). michael@0: NS_IMETHODIMP michael@0: nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname, michael@0: uint32_t aAdditionalParts, michael@0: nsACString &aBaseDomain) michael@0: { michael@0: NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); michael@0: michael@0: // Create a mutable copy of the hostname and normalize it to ACE. michael@0: // This will fail if the hostname includes invalid characters. michael@0: nsAutoCString normHostname(aHostname); michael@0: nsresult rv = NormalizeHostname(normHostname); michael@0: if (NS_FAILED(rv)) return rv; michael@0: michael@0: return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain); michael@0: } michael@0: michael@0: NS_IMETHODIMP michael@0: nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname, michael@0: nsACString& aBaseDomain) michael@0: { michael@0: // Create a mutable copy of the hostname and normalize it to ACE. michael@0: // This will fail if the hostname includes invalid characters. michael@0: nsAutoCString normHostname(aHostname); michael@0: nsresult rv = NormalizeHostname(normHostname); michael@0: NS_ENSURE_SUCCESS(rv, rv); michael@0: michael@0: return GetBaseDomainInternal(normHostname, -1, aBaseDomain); michael@0: } michael@0: michael@0: // Finds the base domain for a host, with requested number of additional parts. michael@0: // This will fail, generating an error, if the host is an IPv4/IPv6 address, michael@0: // if more subdomain parts are requested than are available, or if the hostname michael@0: // includes characters that are not valid in a URL. Normalization is performed michael@0: // on the host string and the result will be in UTF8. michael@0: nsresult michael@0: nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, michael@0: int32_t aAdditionalParts, michael@0: nsACString &aBaseDomain) michael@0: { michael@0: if (aHostname.IsEmpty()) michael@0: return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; michael@0: michael@0: // chomp any trailing dot, and keep track of it for later michael@0: bool trailingDot = aHostname.Last() == '.'; michael@0: if (trailingDot) michael@0: aHostname.Truncate(aHostname.Length() - 1); michael@0: michael@0: // check the edge cases of the host being '.' or having a second trailing '.', michael@0: // since subsequent checks won't catch it. michael@0: if (aHostname.IsEmpty() || aHostname.Last() == '.') michael@0: return NS_ERROR_INVALID_ARG; michael@0: michael@0: // Check if we're dealing with an IPv4/IPv6 hostname, and return michael@0: PRNetAddr addr; michael@0: PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr); michael@0: if (result == PR_SUCCESS) michael@0: return NS_ERROR_HOST_IS_IP_ADDRESS; michael@0: michael@0: // Walk up the domain tree, most specific to least specific, michael@0: // looking for matches at each level. Note that a given level may michael@0: // have multiple attributes (e.g. IsWild() and IsNormal()). michael@0: const char *prevDomain = nullptr; michael@0: const char *currDomain = aHostname.get(); michael@0: const char *nextDot = strchr(currDomain, '.'); michael@0: const char *end = currDomain + aHostname.Length(); michael@0: const char *eTLD = currDomain; michael@0: while (1) { michael@0: // sanity check the string we're about to look up: it should not begin with michael@0: // a '.'; this would mean the hostname began with a '.' or had an michael@0: // embedded '..' sequence. michael@0: if (*currDomain == '.') michael@0: return NS_ERROR_INVALID_ARG; michael@0: michael@0: // perform the hash lookup. michael@0: nsDomainEntry *entry = mHash.GetEntry(currDomain); michael@0: if (entry) { michael@0: if (entry->IsWild() && prevDomain) { michael@0: // wildcard rules imply an eTLD one level inferior to the match. michael@0: eTLD = prevDomain; michael@0: break; michael@0: michael@0: } else if (entry->IsNormal() || !nextDot) { michael@0: // specific match, or we've hit the top domain level michael@0: eTLD = currDomain; michael@0: break; michael@0: michael@0: } else if (entry->IsException()) { michael@0: // exception rules imply an eTLD one level superior to the match. michael@0: eTLD = nextDot + 1; michael@0: break; michael@0: } michael@0: } michael@0: michael@0: if (!nextDot) { michael@0: // we've hit the top domain level; use it by default. michael@0: eTLD = currDomain; michael@0: break; michael@0: } michael@0: michael@0: prevDomain = currDomain; michael@0: currDomain = nextDot + 1; michael@0: nextDot = strchr(currDomain, '.'); michael@0: } michael@0: michael@0: const char *begin, *iter; michael@0: if (aAdditionalParts < 0) { michael@0: NS_ASSERTION(aAdditionalParts == -1, michael@0: "aAdditionalParts can't be negative and different from -1"); michael@0: michael@0: for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++); michael@0: michael@0: if (iter != eTLD) { michael@0: iter++; michael@0: } michael@0: if (iter != eTLD) { michael@0: aAdditionalParts = 0; michael@0: } michael@0: } else { michael@0: // count off the number of requested domains. michael@0: begin = aHostname.get(); michael@0: iter = eTLD; michael@0: michael@0: while (1) { michael@0: if (iter == begin) michael@0: break; michael@0: michael@0: if (*(--iter) == '.' && aAdditionalParts-- == 0) { michael@0: ++iter; michael@0: ++aAdditionalParts; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: if (aAdditionalParts != 0) michael@0: return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; michael@0: michael@0: aBaseDomain = Substring(iter, end); michael@0: // add on the trailing dot, if applicable michael@0: if (trailingDot) michael@0: aBaseDomain.Append('.'); michael@0: michael@0: return NS_OK; michael@0: } michael@0: michael@0: // Normalizes the given hostname, component by component. ASCII/ACE michael@0: // components are lower-cased, and UTF-8 components are normalized per michael@0: // RFC 3454 and converted to ACE. michael@0: nsresult michael@0: nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname) michael@0: { michael@0: if (!IsASCII(aHostname)) { michael@0: nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname); michael@0: if (NS_FAILED(rv)) michael@0: return rv; michael@0: } michael@0: michael@0: ToLowerCase(aHostname); michael@0: return NS_OK; michael@0: }