|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
|
3 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 // This service reads a file of rules describing TLD-like domain names. For a |
|
8 // complete description of the expected file format and parsing rules, see |
|
9 // http://wiki.mozilla.org/Gecko:Effective_TLD_Service |
|
10 |
|
11 #include "mozilla/ArrayUtils.h" |
|
12 #include "mozilla/MemoryReporting.h" |
|
13 |
|
14 #include "nsEffectiveTLDService.h" |
|
15 #include "nsIIDNService.h" |
|
16 #include "nsNetUtil.h" |
|
17 #include "prnetdb.h" |
|
18 |
|
19 using namespace mozilla; |
|
20 |
|
21 NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService, |
|
22 nsIMemoryReporter) |
|
23 |
|
24 // ---------------------------------------------------------------------- |
|
25 |
|
26 #define ETLD_STR_NUM_1(line) str##line |
|
27 #define ETLD_STR_NUM(line) ETLD_STR_NUM_1(line) |
|
28 #define ETLD_ENTRY_OFFSET(name) offsetof(struct etld_string_list, ETLD_STR_NUM(__LINE__)) |
|
29 |
|
30 const ETLDEntry nsDomainEntry::entries[] = { |
|
31 #define ETLD_ENTRY(name, ex, wild) { ETLD_ENTRY_OFFSET(name), ex, wild }, |
|
32 #include "etld_data.inc" |
|
33 #undef ETLD_ENTRY |
|
34 }; |
|
35 |
|
36 const union nsDomainEntry::etld_strings nsDomainEntry::strings = { |
|
37 { |
|
38 #define ETLD_ENTRY(name, ex, wild) name, |
|
39 #include "etld_data.inc" |
|
40 #undef ETLD_ENTRY |
|
41 } |
|
42 }; |
|
43 |
|
44 // Dummy function to statically ensure that our indices don't overflow |
|
45 // the storage provided for them. |
|
46 void |
|
47 nsDomainEntry::FuncForStaticAsserts(void) |
|
48 { |
|
49 #define ETLD_ENTRY(name, ex, wild) \ |
|
50 static_assert(ETLD_ENTRY_OFFSET(name) < (1 << ETLD_ENTRY_N_INDEX_BITS), \ |
|
51 "invalid strtab index"); |
|
52 #include "etld_data.inc" |
|
53 #undef ETLD_ENTRY |
|
54 } |
|
55 |
|
56 #undef ETLD_ENTRY_OFFSET |
|
57 #undef ETLD_STR_NUM |
|
58 #undef ETLD_STR_NUM1 |
|
59 |
|
60 // ---------------------------------------------------------------------- |
|
61 |
|
62 static nsEffectiveTLDService *gService = nullptr; |
|
63 |
|
64 nsEffectiveTLDService::nsEffectiveTLDService() |
|
65 // We'll probably have to rehash at least once, since nsTHashtable doesn't |
|
66 // use a perfect hash, but at least we'll save a few rehashes along the way. |
|
67 // Next optimization here is to precompute the hash using something like |
|
68 // gperf, but one step at a time. :-) |
|
69 : mHash(ArrayLength(nsDomainEntry::entries)) |
|
70 { |
|
71 } |
|
72 |
|
73 nsresult |
|
74 nsEffectiveTLDService::Init() |
|
75 { |
|
76 const ETLDEntry *entries = nsDomainEntry::entries; |
|
77 |
|
78 nsresult rv; |
|
79 mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv); |
|
80 if (NS_FAILED(rv)) return rv; |
|
81 |
|
82 // Initialize eTLD hash from static array |
|
83 for (uint32_t i = 0; i < ArrayLength(nsDomainEntry::entries); i++) { |
|
84 const char *domain = nsDomainEntry::GetEffectiveTLDName(entries[i].strtab_index); |
|
85 #ifdef DEBUG |
|
86 nsDependentCString name(domain); |
|
87 nsAutoCString normalizedName(domain); |
|
88 NS_ASSERTION(NS_SUCCEEDED(NormalizeHostname(normalizedName)), |
|
89 "normalization failure!"); |
|
90 NS_ASSERTION(name.Equals(normalizedName), "domain not normalized!"); |
|
91 #endif |
|
92 nsDomainEntry *entry = mHash.PutEntry(domain); |
|
93 NS_ENSURE_TRUE(entry, NS_ERROR_OUT_OF_MEMORY); |
|
94 entry->SetData(&entries[i]); |
|
95 } |
|
96 |
|
97 MOZ_ASSERT(!gService); |
|
98 gService = this; |
|
99 RegisterWeakMemoryReporter(this); |
|
100 |
|
101 return NS_OK; |
|
102 } |
|
103 |
|
104 nsEffectiveTLDService::~nsEffectiveTLDService() |
|
105 { |
|
106 UnregisterWeakMemoryReporter(this); |
|
107 gService = nullptr; |
|
108 } |
|
109 |
|
110 MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf) |
|
111 |
|
112 NS_IMETHODIMP |
|
113 nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport, |
|
114 nsISupports* aData) |
|
115 { |
|
116 return MOZ_COLLECT_REPORT( |
|
117 "explicit/xpcom/effective-TLD-service", KIND_HEAP, UNITS_BYTES, |
|
118 SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf), |
|
119 "Memory used by the effective TLD service."); |
|
120 } |
|
121 |
|
122 size_t |
|
123 nsEffectiveTLDService::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) |
|
124 { |
|
125 size_t n = aMallocSizeOf(this); |
|
126 n += mHash.SizeOfExcludingThis(nullptr, aMallocSizeOf); |
|
127 |
|
128 // Measurement of the following members may be added later if DMD finds it is |
|
129 // worthwhile: |
|
130 // - mIDNService |
|
131 |
|
132 return n; |
|
133 } |
|
134 |
|
135 // External function for dealing with URI's correctly. |
|
136 // Pulls out the host portion from an nsIURI, and calls through to |
|
137 // GetPublicSuffixFromHost(). |
|
138 NS_IMETHODIMP |
|
139 nsEffectiveTLDService::GetPublicSuffix(nsIURI *aURI, |
|
140 nsACString &aPublicSuffix) |
|
141 { |
|
142 NS_ENSURE_ARG_POINTER(aURI); |
|
143 |
|
144 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); |
|
145 NS_ENSURE_ARG_POINTER(innerURI); |
|
146 |
|
147 nsAutoCString host; |
|
148 nsresult rv = innerURI->GetAsciiHost(host); |
|
149 if (NS_FAILED(rv)) return rv; |
|
150 |
|
151 return GetBaseDomainInternal(host, 0, aPublicSuffix); |
|
152 } |
|
153 |
|
154 // External function for dealing with URI's correctly. |
|
155 // Pulls out the host portion from an nsIURI, and calls through to |
|
156 // GetBaseDomainFromHost(). |
|
157 NS_IMETHODIMP |
|
158 nsEffectiveTLDService::GetBaseDomain(nsIURI *aURI, |
|
159 uint32_t aAdditionalParts, |
|
160 nsACString &aBaseDomain) |
|
161 { |
|
162 NS_ENSURE_ARG_POINTER(aURI); |
|
163 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); |
|
164 |
|
165 nsCOMPtr<nsIURI> innerURI = NS_GetInnermostURI(aURI); |
|
166 NS_ENSURE_ARG_POINTER(innerURI); |
|
167 |
|
168 nsAutoCString host; |
|
169 nsresult rv = innerURI->GetAsciiHost(host); |
|
170 if (NS_FAILED(rv)) return rv; |
|
171 |
|
172 return GetBaseDomainInternal(host, aAdditionalParts + 1, aBaseDomain); |
|
173 } |
|
174 |
|
175 // External function for dealing with a host string directly: finds the public |
|
176 // suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal(). |
|
177 NS_IMETHODIMP |
|
178 nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString &aHostname, |
|
179 nsACString &aPublicSuffix) |
|
180 { |
|
181 // Create a mutable copy of the hostname and normalize it to ACE. |
|
182 // This will fail if the hostname includes invalid characters. |
|
183 nsAutoCString normHostname(aHostname); |
|
184 nsresult rv = NormalizeHostname(normHostname); |
|
185 if (NS_FAILED(rv)) return rv; |
|
186 |
|
187 return GetBaseDomainInternal(normHostname, 0, aPublicSuffix); |
|
188 } |
|
189 |
|
190 // External function for dealing with a host string directly: finds the base |
|
191 // domain (e.g. www.co.uk) for the given hostname and number of subdomain parts |
|
192 // requested. See GetBaseDomainInternal(). |
|
193 NS_IMETHODIMP |
|
194 nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString &aHostname, |
|
195 uint32_t aAdditionalParts, |
|
196 nsACString &aBaseDomain) |
|
197 { |
|
198 NS_ENSURE_TRUE( ((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG); |
|
199 |
|
200 // Create a mutable copy of the hostname and normalize it to ACE. |
|
201 // This will fail if the hostname includes invalid characters. |
|
202 nsAutoCString normHostname(aHostname); |
|
203 nsresult rv = NormalizeHostname(normHostname); |
|
204 if (NS_FAILED(rv)) return rv; |
|
205 |
|
206 return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, aBaseDomain); |
|
207 } |
|
208 |
|
209 NS_IMETHODIMP |
|
210 nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname, |
|
211 nsACString& aBaseDomain) |
|
212 { |
|
213 // Create a mutable copy of the hostname and normalize it to ACE. |
|
214 // This will fail if the hostname includes invalid characters. |
|
215 nsAutoCString normHostname(aHostname); |
|
216 nsresult rv = NormalizeHostname(normHostname); |
|
217 NS_ENSURE_SUCCESS(rv, rv); |
|
218 |
|
219 return GetBaseDomainInternal(normHostname, -1, aBaseDomain); |
|
220 } |
|
221 |
|
222 // Finds the base domain for a host, with requested number of additional parts. |
|
223 // This will fail, generating an error, if the host is an IPv4/IPv6 address, |
|
224 // if more subdomain parts are requested than are available, or if the hostname |
|
225 // includes characters that are not valid in a URL. Normalization is performed |
|
226 // on the host string and the result will be in UTF8. |
|
227 nsresult |
|
228 nsEffectiveTLDService::GetBaseDomainInternal(nsCString &aHostname, |
|
229 int32_t aAdditionalParts, |
|
230 nsACString &aBaseDomain) |
|
231 { |
|
232 if (aHostname.IsEmpty()) |
|
233 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; |
|
234 |
|
235 // chomp any trailing dot, and keep track of it for later |
|
236 bool trailingDot = aHostname.Last() == '.'; |
|
237 if (trailingDot) |
|
238 aHostname.Truncate(aHostname.Length() - 1); |
|
239 |
|
240 // check the edge cases of the host being '.' or having a second trailing '.', |
|
241 // since subsequent checks won't catch it. |
|
242 if (aHostname.IsEmpty() || aHostname.Last() == '.') |
|
243 return NS_ERROR_INVALID_ARG; |
|
244 |
|
245 // Check if we're dealing with an IPv4/IPv6 hostname, and return |
|
246 PRNetAddr addr; |
|
247 PRStatus result = PR_StringToNetAddr(aHostname.get(), &addr); |
|
248 if (result == PR_SUCCESS) |
|
249 return NS_ERROR_HOST_IS_IP_ADDRESS; |
|
250 |
|
251 // Walk up the domain tree, most specific to least specific, |
|
252 // looking for matches at each level. Note that a given level may |
|
253 // have multiple attributes (e.g. IsWild() and IsNormal()). |
|
254 const char *prevDomain = nullptr; |
|
255 const char *currDomain = aHostname.get(); |
|
256 const char *nextDot = strchr(currDomain, '.'); |
|
257 const char *end = currDomain + aHostname.Length(); |
|
258 const char *eTLD = currDomain; |
|
259 while (1) { |
|
260 // sanity check the string we're about to look up: it should not begin with |
|
261 // a '.'; this would mean the hostname began with a '.' or had an |
|
262 // embedded '..' sequence. |
|
263 if (*currDomain == '.') |
|
264 return NS_ERROR_INVALID_ARG; |
|
265 |
|
266 // perform the hash lookup. |
|
267 nsDomainEntry *entry = mHash.GetEntry(currDomain); |
|
268 if (entry) { |
|
269 if (entry->IsWild() && prevDomain) { |
|
270 // wildcard rules imply an eTLD one level inferior to the match. |
|
271 eTLD = prevDomain; |
|
272 break; |
|
273 |
|
274 } else if (entry->IsNormal() || !nextDot) { |
|
275 // specific match, or we've hit the top domain level |
|
276 eTLD = currDomain; |
|
277 break; |
|
278 |
|
279 } else if (entry->IsException()) { |
|
280 // exception rules imply an eTLD one level superior to the match. |
|
281 eTLD = nextDot + 1; |
|
282 break; |
|
283 } |
|
284 } |
|
285 |
|
286 if (!nextDot) { |
|
287 // we've hit the top domain level; use it by default. |
|
288 eTLD = currDomain; |
|
289 break; |
|
290 } |
|
291 |
|
292 prevDomain = currDomain; |
|
293 currDomain = nextDot + 1; |
|
294 nextDot = strchr(currDomain, '.'); |
|
295 } |
|
296 |
|
297 const char *begin, *iter; |
|
298 if (aAdditionalParts < 0) { |
|
299 NS_ASSERTION(aAdditionalParts == -1, |
|
300 "aAdditionalParts can't be negative and different from -1"); |
|
301 |
|
302 for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++); |
|
303 |
|
304 if (iter != eTLD) { |
|
305 iter++; |
|
306 } |
|
307 if (iter != eTLD) { |
|
308 aAdditionalParts = 0; |
|
309 } |
|
310 } else { |
|
311 // count off the number of requested domains. |
|
312 begin = aHostname.get(); |
|
313 iter = eTLD; |
|
314 |
|
315 while (1) { |
|
316 if (iter == begin) |
|
317 break; |
|
318 |
|
319 if (*(--iter) == '.' && aAdditionalParts-- == 0) { |
|
320 ++iter; |
|
321 ++aAdditionalParts; |
|
322 break; |
|
323 } |
|
324 } |
|
325 } |
|
326 |
|
327 if (aAdditionalParts != 0) |
|
328 return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS; |
|
329 |
|
330 aBaseDomain = Substring(iter, end); |
|
331 // add on the trailing dot, if applicable |
|
332 if (trailingDot) |
|
333 aBaseDomain.Append('.'); |
|
334 |
|
335 return NS_OK; |
|
336 } |
|
337 |
|
338 // Normalizes the given hostname, component by component. ASCII/ACE |
|
339 // components are lower-cased, and UTF-8 components are normalized per |
|
340 // RFC 3454 and converted to ACE. |
|
341 nsresult |
|
342 nsEffectiveTLDService::NormalizeHostname(nsCString &aHostname) |
|
343 { |
|
344 if (!IsASCII(aHostname)) { |
|
345 nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname); |
|
346 if (NS_FAILED(rv)) |
|
347 return rv; |
|
348 } |
|
349 |
|
350 ToLowerCase(aHostname); |
|
351 return NS_OK; |
|
352 } |