Wed, 31 Dec 2014 06:55:46 +0100
Added tag TORBROWSER_REPLICA for changeset 6474c204b198
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // This service reads a file of rules describing TLD-like domain names. For a
8 // complete description of the expected file format and parsing rules, see
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service
11 #include "mozilla/ArrayUtils.h"
12 #include "mozilla/MemoryReporting.h"
14 #include "nsEffectiveTLDService.h"
15 #include "nsIIDNService.h"
16 #include "nsNetUtil.h"
17 #include "prnetdb.h"
19 using namespace mozilla;
21 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
22 nsIMemoryReporter)
24 // ----------------------------------------------------------------------
26 #define ETLD_STR_NUM_1(line) str##line
27 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line)
28 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__))
30 const ETLDEntry nsDomainEntry::entries[] = {
31 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild },
32 #include "etld_data.inc"
33 #undef ETLD_ENTRY
34 };
36 const union nsDomainEntry::etld_strings nsDomainEntry::strings = {
37 {
38 #define ETLD_ENTRY(name, ex, wild) name,
39 #include "etld_data.inc"
40 #undef ETLD_ENTRY
41 }
42 };
44 // Dummy function to statically ensure that our indices don't overflow
45 // the storage provided for them.
46 void
47 nsDomainEntry::FuncForStaticAsserts(void)
48 {
49 #define ETLD_ENTRY(name, ex, wild) \
50 static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \
51 "invalid strtab index");
52 #include "etld_data.inc"
53 #undef ETLD_ENTRY
54 }
56 #undef ETLD_ENTRY_OFFSET
57 #undef ETLD_STR_NUM
58 #undef ETLD_STR_NUM1
60 // ----------------------------------------------------------------------
62 static nsEffectiveTLDService *gService = nullptr;
64 nsEffectiveTLDService::nsEffectiveTLDService()
65 // We'll probably have to rehash at least once, since nsTHashtable doesn't
66 // use a perfect hash, but at least we'll save a few rehashes along the way.
67 // Next optimization here is to precompute the hash using something like
68 // gperf, but one step at a time. :-)
69 : mHash(ArrayLength(nsDomainEntry::entries))
70 {
71 }
73 nsresult
74 nsEffectiveTLDService::Init()
75 {
76 const ETLDEntry *entries = nsDomainEntry::entries;
78 nsresult rv;
79 mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
80 if (NS_FAILED(rv)) return rv;
82 // Initialize eTLD hash from static array
83 for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) {
84 const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index);
85 #ifdef DEBUG
86 nsDependentCString name(domain);
87 nsAutoCString normalizedName(domain);
88 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)),
89 "normalization failure!");
90 NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!");
91 #endif
92 nsDomainEntry *entry = mHash.PutEntry(domain);
93 NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY);
94 entry->SetData(&entries[i]);
95 }
97 MOZ_ASSERT(!gService);
98 gService = this;
99 RegisterWeakMemoryReporter(this);
101 return NS_OK;
102 }
104 nsEffectiveTLDService::~nsEffectiveTLDService()
105 {
106 UnregisterWeakMemoryReporter(this);
107 gService = nullptr;
108 }
110 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
112 NS_IMETHODIMP
113 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
114 nsISupports* aData)
115 {
116 return MOZ_COLLECT_REPORT(
117 "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES,
118 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
119 "Memory used by the effective TLD service.");
120 }
122 size_t
123 nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
124 {
125 size_t n = aMallocSizeOf(this);
126 n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf);
128 // Measurement of the following members may be added later if DMD finds it is
129 // worthwhile:
130 // - mIDNService
132 return n;
133 }
135 // External function for dealing with URI's correctly.
136 // Pulls out the host portion from an nsIURI, and calls through to
137 // GetPublicSuffixFromHost().
138 NS_IMETHODIMP
139 nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI,
140 nsACString &aPublicSuffix)
141 {
142 NS_ENSURE_ARG_POINTER(aURI);
144 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
145 NS_ENSURE_ARG_POINTER(innerURI);
147 nsAutoCString host;
148 nsresult rv = innerURI->GetAsciiHost(host);
149 if (NS_FAILED(rv)) return rv;
151 return GetBaseDomainInternal(host, 0, aPublicSuffix);
152 }
154 // External function for dealing with URI's correctly.
155 // Pulls out the host portion from an nsIURI, and calls through to
156 // GetBaseDomainFromHost().
157 NS_IMETHODIMP
158 nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI,
159 uint32_t aAdditionalParts,
160 nsACString &aBaseDomain)
161 {
162 NS_ENSURE_ARG_POINTER(aURI);
163 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
165 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI);
166 NS_ENSURE_ARG_POINTER(innerURI);
168 nsAutoCString host;
169 nsresult rv = innerURI->GetAsciiHost(host);
170 if (NS_FAILED(rv)) return rv;
172 return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain);
173 }
175 // External function for dealing with a host string directly: finds the public
176 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
177 NS_IMETHODIMP
178 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname,
179 nsACString &aPublicSuffix)
180 {
181 // Create a mutable copy of the hostname and normalize it to ACE.
182 // This will fail if the hostname includes invalid characters.
183 nsAutoCString normHostname(aHostname);
184 nsresult rv = NormalizeHostname(normHostname);
185 if (NS_FAILED(rv)) return rv;
187 return GetBaseDomainInternal(normHostname, 0, aPublicSuffix);
188 }
190 // External function for dealing with a host string directly: finds the base
191 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
192 // requested. See GetBaseDomainInternal().
193 NS_IMETHODIMP
194 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname,
195 uint32_t aAdditionalParts,
196 nsACString &aBaseDomain)
197 {
198 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
200 // Create a mutable copy of the hostname and normalize it to ACE.
201 // This will fail if the hostname includes invalid characters.
202 nsAutoCString normHostname(aHostname);
203 nsresult rv = NormalizeHostname(normHostname);
204 if (NS_FAILED(rv)) return rv;
206 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain);
207 }
209 NS_IMETHODIMP
210 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
211 nsACString& aBaseDomain)
212 {
213 // Create a mutable copy of the hostname and normalize it to ACE.
214 // This will fail if the hostname includes invalid characters.
215 nsAutoCString normHostname(aHostname);
216 nsresult rv = NormalizeHostname(normHostname);
217 NS_ENSURE_SUCCESS(rv, rv);
219 return GetBaseDomainInternal(normHostname, -1, aBaseDomain);
220 }
222 // Finds the base domain for a host, with requested number of additional parts.
223 // This will fail, generating an error, if the host is an IPv4/IPv6 address,
224 // if more subdomain parts are requested than are available, or if the hostname
225 // includes characters that are not valid in a URL. Normalization is performed
226 // on the host string and the result will be in UTF8.
227 nsresult
228 nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname,
229 int32_t aAdditionalParts,
230 nsACString &aBaseDomain)
231 {
232 if (aHostname.IsEmpty())
233 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
235 // chomp any trailing dot, and keep track of it for later
236 bool trailingDot = aHostname.Last() == '.';
237 if (trailingDot)
238 aHostname.Truncate(aHostname.Length() - 1);
240 // check the edge cases of the host being '.' or having a second trailing '.',
241 // since subsequent checks won't catch it.
242 if (aHostname.IsEmpty() || aHostname.Last() == '.')
243 return NS_ERROR_INVALID_ARG;
245 // Check if we're dealing with an IPv4/IPv6 hostname, and return
246 PRNetAddr addr;
247 PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr);
248 if (result == PR_SUCCESS)
249 return NS_ERROR_HOST_IS_IP_ADDRESS;
251 // Walk up the domain tree, most specific to least specific,
252 // looking for matches at each level. Note that a given level may
253 // have multiple attributes (e.g. IsWild() and IsNormal()).
254 const char *prevDomain = nullptr;
255 const char *currDomain = aHostname.get();
256 const char *nextDot = strchr(currDomain, '.');
257 const char *end = currDomain + aHostname.Length();
258 const char *eTLD = currDomain;
259 while (1) {
260 // sanity check the string we're about to look up: it should not begin with
261 // a '.'; this would mean the hostname began with a '.' or had an
262 // embedded '..' sequence.
263 if (*currDomain == '.')
264 return NS_ERROR_INVALID_ARG;
266 // perform the hash lookup.
267 nsDomainEntry *entry = mHash.GetEntry(currDomain);
268 if (entry) {
269 if (entry->IsWild() && prevDomain) {
270 // wildcard rules imply an eTLD one level inferior to the match.
271 eTLD = prevDomain;
272 break;
274 } else if (entry->IsNormal() || !nextDot) {
275 // specific match, or we've hit the top domain level
276 eTLD = currDomain;
277 break;
279 } else if (entry->IsException()) {
280 // exception rules imply an eTLD one level superior to the match.
281 eTLD = nextDot + 1;
282 break;
283 }
284 }
286 if (!nextDot) {
287 // we've hit the top domain level; use it by default.
288 eTLD = currDomain;
289 break;
290 }
292 prevDomain = currDomain;
293 currDomain = nextDot + 1;
294 nextDot = strchr(currDomain, '.');
295 }
297 const char *begin, *iter;
298 if (aAdditionalParts < 0) {
299 NS_ASSERTION(aAdditionalParts == -1,
300 "aAdditionalParts can't be negative and different from -1");
302 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++);
304 if (iter != eTLD) {
305 iter++;
306 }
307 if (iter != eTLD) {
308 aAdditionalParts = 0;
309 }
310 } else {
311 // count off the number of requested domains.
312 begin = aHostname.get();
313 iter = eTLD;
315 while (1) {
316 if (iter == begin)
317 break;
319 if (*(--iter) == '.' && aAdditionalParts-- == 0) {
320 ++iter;
321 ++aAdditionalParts;
322 break;
323 }
324 }
325 }
327 if (aAdditionalParts != 0)
328 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
330 aBaseDomain = Substring(iter, end);
331 // add on the trailing dot, if applicable
332 if (trailingDot)
333 aBaseDomain.Append('.');
335 return NS_OK;
336 }
338 // Normalizes the given hostname, component by component. ASCII/ACE
339 // components are lower-cased, and UTF-8 components are normalized per
340 // RFC 3454 and converted to ACE.
341 nsresult
342 nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname)
343 {
344 if (!IsASCII(aHostname)) {
345 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
346 if (NS_FAILED(rv))
347 return rv;
348 }
350 ToLowerCase(aHostname);
351 return NS_OK;
352 }