netwerk/dns/nsIDNService.cpp

Wed, 31 Dec 2014 06:55:46 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:55:46 +0100
changeset 1
ca08bd8f51b2
permissions
-rw-r--r--

Added tag TORBROWSER_REPLICA for changeset 6474c204b198

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 #include "nsIDNService.h"
michael@0 7 #include "nsReadableUtils.h"
michael@0 8 #include "nsCRT.h"
michael@0 9 #include "nsUnicharUtils.h"
michael@0 10 #include "nsUnicodeProperties.h"
michael@0 11 #include "nsUnicodeScriptCodes.h"
michael@0 12 #include "harfbuzz/hb.h"
michael@0 13 #include "nsIServiceManager.h"
michael@0 14 #include "nsIPrefService.h"
michael@0 15 #include "nsIPrefBranch.h"
michael@0 16 #include "nsIObserverService.h"
michael@0 17 #include "nsISupportsPrimitives.h"
michael@0 18 #include "punycode.h"
michael@0 19
michael@0 20
michael@0 21 using namespace mozilla::unicode;
michael@0 22
michael@0 23 //-----------------------------------------------------------------------------
michael@0 24 // RFC 1034 - 3.1. Name space specifications and terminology
michael@0 25 static const uint32_t kMaxDNSNodeLen = 63;
michael@0 26
michael@0 27 //-----------------------------------------------------------------------------
michael@0 28
michael@0 29 #define NS_NET_PREF_IDNTESTBED "network.IDN_testbed"
michael@0 30 #define NS_NET_PREF_IDNPREFIX "network.IDN_prefix"
michael@0 31 #define NS_NET_PREF_IDNBLACKLIST "network.IDN.blacklist_chars"
michael@0 32 #define NS_NET_PREF_SHOWPUNYCODE "network.IDN_show_punycode"
michael@0 33 #define NS_NET_PREF_IDNWHITELIST "network.IDN.whitelist."
michael@0 34 #define NS_NET_PREF_IDNUSEWHITELIST "network.IDN.use_whitelist"
michael@0 35 #define NS_NET_PREF_IDNRESTRICTION "network.IDN.restriction_profile"
michael@0 36
michael@0 37 inline bool isOnlySafeChars(const nsAFlatString& in,
michael@0 38 const nsAFlatString& blacklist)
michael@0 39 {
michael@0 40 return (blacklist.IsEmpty() ||
michael@0 41 in.FindCharInSet(blacklist) == kNotFound);
michael@0 42 }
michael@0 43
michael@0 44 //-----------------------------------------------------------------------------
michael@0 45 // nsIDNService
michael@0 46 //-----------------------------------------------------------------------------
michael@0 47
michael@0 48 /* Implementation file */
michael@0 49 NS_IMPL_ISUPPORTS(nsIDNService,
michael@0 50 nsIIDNService,
michael@0 51 nsIObserver,
michael@0 52 nsISupportsWeakReference)
michael@0 53
michael@0 54 nsresult nsIDNService::Init()
michael@0 55 {
michael@0 56 nsCOMPtr<nsIPrefService> prefs(do_GetService(NS_PREFSERVICE_CONTRACTID));
michael@0 57 if (prefs)
michael@0 58 prefs->GetBranch(NS_NET_PREF_IDNWHITELIST, getter_AddRefs(mIDNWhitelistPrefBranch));
michael@0 59
michael@0 60 nsCOMPtr<nsIPrefBranch> prefInternal(do_QueryInterface(prefs));
michael@0 61 if (prefInternal) {
michael@0 62 prefInternal->AddObserver(NS_NET_PREF_IDNTESTBED, this, true);
michael@0 63 prefInternal->AddObserver(NS_NET_PREF_IDNPREFIX, this, true);
michael@0 64 prefInternal->AddObserver(NS_NET_PREF_IDNBLACKLIST, this, true);
michael@0 65 prefInternal->AddObserver(NS_NET_PREF_SHOWPUNYCODE, this, true);
michael@0 66 prefInternal->AddObserver(NS_NET_PREF_IDNRESTRICTION, this, true);
michael@0 67 prefInternal->AddObserver(NS_NET_PREF_IDNUSEWHITELIST, this, true);
michael@0 68 prefsChanged(prefInternal, nullptr);
michael@0 69 }
michael@0 70
michael@0 71 return NS_OK;
michael@0 72 }
michael@0 73
michael@0 74 NS_IMETHODIMP nsIDNService::Observe(nsISupports *aSubject,
michael@0 75 const char *aTopic,
michael@0 76 const char16_t *aData)
michael@0 77 {
michael@0 78 if (!strcmp(aTopic, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID)) {
michael@0 79 nsCOMPtr<nsIPrefBranch> prefBranch( do_QueryInterface(aSubject) );
michael@0 80 if (prefBranch)
michael@0 81 prefsChanged(prefBranch, aData);
michael@0 82 }
michael@0 83 return NS_OK;
michael@0 84 }
michael@0 85
michael@0 86 void nsIDNService::prefsChanged(nsIPrefBranch *prefBranch, const char16_t *pref)
michael@0 87 {
michael@0 88 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNTESTBED).Equals(pref)) {
michael@0 89 bool val;
michael@0 90 if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_IDNTESTBED, &val)))
michael@0 91 mMultilingualTestBed = val;
michael@0 92 }
michael@0 93 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNPREFIX).Equals(pref)) {
michael@0 94 nsXPIDLCString prefix;
michael@0 95 nsresult rv = prefBranch->GetCharPref(NS_NET_PREF_IDNPREFIX, getter_Copies(prefix));
michael@0 96 if (NS_SUCCEEDED(rv) && prefix.Length() <= kACEPrefixLen)
michael@0 97 PL_strncpyz(nsIDNService::mACEPrefix, prefix.get(), kACEPrefixLen + 1);
michael@0 98 }
michael@0 99 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNBLACKLIST).Equals(pref)) {
michael@0 100 nsCOMPtr<nsISupportsString> blacklist;
michael@0 101 nsresult rv = prefBranch->GetComplexValue(NS_NET_PREF_IDNBLACKLIST,
michael@0 102 NS_GET_IID(nsISupportsString),
michael@0 103 getter_AddRefs(blacklist));
michael@0 104 if (NS_SUCCEEDED(rv))
michael@0 105 blacklist->ToString(getter_Copies(mIDNBlacklist));
michael@0 106 else
michael@0 107 mIDNBlacklist.Truncate();
michael@0 108 }
michael@0 109 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_SHOWPUNYCODE).Equals(pref)) {
michael@0 110 bool val;
michael@0 111 if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_SHOWPUNYCODE, &val)))
michael@0 112 mShowPunycode = val;
michael@0 113 }
michael@0 114 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNUSEWHITELIST).Equals(pref)) {
michael@0 115 bool val;
michael@0 116 if (NS_SUCCEEDED(prefBranch->GetBoolPref(NS_NET_PREF_IDNUSEWHITELIST,
michael@0 117 &val)))
michael@0 118 mIDNUseWhitelist = val;
michael@0 119 }
michael@0 120 if (!pref || NS_LITERAL_STRING(NS_NET_PREF_IDNRESTRICTION).Equals(pref)) {
michael@0 121 nsXPIDLCString profile;
michael@0 122 if (NS_FAILED(prefBranch->GetCharPref(NS_NET_PREF_IDNRESTRICTION,
michael@0 123 getter_Copies(profile)))) {
michael@0 124 profile.Truncate();
michael@0 125 }
michael@0 126 if (profile.Equals(NS_LITERAL_CSTRING("moderate"))) {
michael@0 127 mRestrictionProfile = eModeratelyRestrictiveProfile;
michael@0 128 } else if (profile.Equals(NS_LITERAL_CSTRING("high"))) {
michael@0 129 mRestrictionProfile = eHighlyRestrictiveProfile;
michael@0 130 } else {
michael@0 131 mRestrictionProfile = eASCIIOnlyProfile;
michael@0 132 }
michael@0 133 }
michael@0 134 }
michael@0 135
michael@0 136 nsIDNService::nsIDNService()
michael@0 137 {
michael@0 138 // initialize to the official prefix (RFC 3490 "5. ACE prefix")
michael@0 139 const char kIDNSPrefix[] = "xn--";
michael@0 140 strcpy(mACEPrefix, kIDNSPrefix);
michael@0 141
michael@0 142 mMultilingualTestBed = false;
michael@0 143
michael@0 144 if (idn_success != idn_nameprep_create(nullptr, &mNamePrepHandle))
michael@0 145 mNamePrepHandle = nullptr;
michael@0 146
michael@0 147 mNormalizer = do_GetService(NS_UNICODE_NORMALIZER_CONTRACTID);
michael@0 148 /* member initializers and constructor code */
michael@0 149 }
michael@0 150
michael@0 151 nsIDNService::~nsIDNService()
michael@0 152 {
michael@0 153 idn_nameprep_destroy(mNamePrepHandle);
michael@0 154 }
michael@0 155
michael@0 156 /* ACString ConvertUTF8toACE (in AUTF8String input); */
michael@0 157 NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString & input, nsACString & ace)
michael@0 158 {
michael@0 159 return UTF8toACE(input, ace, true, true);
michael@0 160 }
michael@0 161
michael@0 162 nsresult nsIDNService::SelectiveUTF8toACE(const nsACString& input, nsACString& ace)
michael@0 163 {
michael@0 164 return UTF8toACE(input, ace, true, false);
michael@0 165 }
michael@0 166
michael@0 167 nsresult nsIDNService::UTF8toACE(const nsACString & input, nsACString & ace, bool allowUnassigned, bool convertAllLabels)
michael@0 168 {
michael@0 169 nsresult rv;
michael@0 170 NS_ConvertUTF8toUTF16 ustr(input);
michael@0 171
michael@0 172 // map ideographic period to ASCII period etc.
michael@0 173 normalizeFullStops(ustr);
michael@0 174
michael@0 175
michael@0 176 uint32_t len, offset;
michael@0 177 len = 0;
michael@0 178 offset = 0;
michael@0 179 nsAutoCString encodedBuf;
michael@0 180
michael@0 181 nsAString::const_iterator start, end;
michael@0 182 ustr.BeginReading(start);
michael@0 183 ustr.EndReading(end);
michael@0 184 ace.Truncate();
michael@0 185
michael@0 186 // encode nodes if non ASCII
michael@0 187 while (start != end) {
michael@0 188 len++;
michael@0 189 if (*start++ == (char16_t)'.') {
michael@0 190 rv = stringPrepAndACE(Substring(ustr, offset, len - 1), encodedBuf,
michael@0 191 allowUnassigned, convertAllLabels);
michael@0 192 NS_ENSURE_SUCCESS(rv, rv);
michael@0 193
michael@0 194 ace.Append(encodedBuf);
michael@0 195 ace.Append('.');
michael@0 196 offset += len;
michael@0 197 len = 0;
michael@0 198 }
michael@0 199 }
michael@0 200
michael@0 201 // add extra node for multilingual test bed
michael@0 202 if (mMultilingualTestBed)
michael@0 203 ace.AppendLiteral("mltbd.");
michael@0 204 // encode the last node if non ASCII
michael@0 205 if (len) {
michael@0 206 rv = stringPrepAndACE(Substring(ustr, offset, len), encodedBuf,
michael@0 207 allowUnassigned, convertAllLabels);
michael@0 208 NS_ENSURE_SUCCESS(rv, rv);
michael@0 209
michael@0 210 ace.Append(encodedBuf);
michael@0 211 }
michael@0 212
michael@0 213 return NS_OK;
michael@0 214 }
michael@0 215
michael@0 216 /* AUTF8String convertACEtoUTF8(in ACString input); */
michael@0 217 NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString & input, nsACString & _retval)
michael@0 218 {
michael@0 219 return ACEtoUTF8(input, _retval, true, true);
michael@0 220 }
michael@0 221
michael@0 222 nsresult nsIDNService::SelectiveACEtoUTF8(const nsACString& input, nsACString& _retval)
michael@0 223 {
michael@0 224 return ACEtoUTF8(input, _retval, false, false);
michael@0 225 }
michael@0 226
michael@0 227 nsresult nsIDNService::ACEtoUTF8(const nsACString & input, nsACString & _retval,
michael@0 228 bool allowUnassigned, bool convertAllLabels)
michael@0 229 {
michael@0 230 // RFC 3490 - 4.2 ToUnicode
michael@0 231 // ToUnicode never fails. If any step fails, then the original input
michael@0 232 // sequence is returned immediately in that step.
michael@0 233
michael@0 234 uint32_t len = 0, offset = 0;
michael@0 235 nsAutoCString decodedBuf;
michael@0 236
michael@0 237 nsACString::const_iterator start, end;
michael@0 238 input.BeginReading(start);
michael@0 239 input.EndReading(end);
michael@0 240 _retval.Truncate();
michael@0 241
michael@0 242 // loop and decode nodes
michael@0 243 while (start != end) {
michael@0 244 len++;
michael@0 245 if (*start++ == '.') {
michael@0 246 if (NS_FAILED(decodeACE(Substring(input, offset, len - 1), decodedBuf,
michael@0 247 allowUnassigned, convertAllLabels))) {
michael@0 248 _retval.Assign(input);
michael@0 249 return NS_OK;
michael@0 250 }
michael@0 251
michael@0 252 _retval.Append(decodedBuf);
michael@0 253 _retval.Append('.');
michael@0 254 offset += len;
michael@0 255 len = 0;
michael@0 256 }
michael@0 257 }
michael@0 258 // decode the last node
michael@0 259 if (len) {
michael@0 260 if (NS_FAILED(decodeACE(Substring(input, offset, len), decodedBuf,
michael@0 261 allowUnassigned, convertAllLabels)))
michael@0 262 _retval.Assign(input);
michael@0 263 else
michael@0 264 _retval.Append(decodedBuf);
michael@0 265 }
michael@0 266
michael@0 267 return NS_OK;
michael@0 268 }
michael@0 269
michael@0 270 /* boolean isACE(in ACString input); */
michael@0 271 NS_IMETHODIMP nsIDNService::IsACE(const nsACString & input, bool *_retval)
michael@0 272 {
michael@0 273 nsACString::const_iterator begin;
michael@0 274 input.BeginReading(begin);
michael@0 275
michael@0 276 const char *data = begin.get();
michael@0 277 uint32_t dataLen = begin.size_forward();
michael@0 278
michael@0 279 // look for the ACE prefix in the input string. it may occur
michael@0 280 // at the beginning of any segment in the domain name. for
michael@0 281 // example: "www.xn--ENCODED.com"
michael@0 282
michael@0 283 const char *p = PL_strncasestr(data, mACEPrefix, dataLen);
michael@0 284
michael@0 285 *_retval = p && (p == data || *(p - 1) == '.');
michael@0 286 return NS_OK;
michael@0 287 }
michael@0 288
michael@0 289 /* AUTF8String normalize(in AUTF8String input); */
michael@0 290 NS_IMETHODIMP nsIDNService::Normalize(const nsACString & input, nsACString & output)
michael@0 291 {
michael@0 292 // protect against bogus input
michael@0 293 NS_ENSURE_TRUE(IsUTF8(input), NS_ERROR_UNEXPECTED);
michael@0 294
michael@0 295 NS_ConvertUTF8toUTF16 inUTF16(input);
michael@0 296 normalizeFullStops(inUTF16);
michael@0 297
michael@0 298 // pass the domain name to stringprep label by label
michael@0 299 nsAutoString outUTF16, outLabel;
michael@0 300
michael@0 301 uint32_t len = 0, offset = 0;
michael@0 302 nsresult rv;
michael@0 303 nsAString::const_iterator start, end;
michael@0 304 inUTF16.BeginReading(start);
michael@0 305 inUTF16.EndReading(end);
michael@0 306
michael@0 307 while (start != end) {
michael@0 308 len++;
michael@0 309 if (*start++ == char16_t('.')) {
michael@0 310 rv = stringPrep(Substring(inUTF16, offset, len - 1), outLabel, true);
michael@0 311 NS_ENSURE_SUCCESS(rv, rv);
michael@0 312
michael@0 313 outUTF16.Append(outLabel);
michael@0 314 outUTF16.Append(char16_t('.'));
michael@0 315 offset += len;
michael@0 316 len = 0;
michael@0 317 }
michael@0 318 }
michael@0 319 if (len) {
michael@0 320 rv = stringPrep(Substring(inUTF16, offset, len), outLabel, true);
michael@0 321 NS_ENSURE_SUCCESS(rv, rv);
michael@0 322
michael@0 323 outUTF16.Append(outLabel);
michael@0 324 }
michael@0 325
michael@0 326 CopyUTF16toUTF8(outUTF16, output);
michael@0 327 if (!isOnlySafeChars(outUTF16, mIDNBlacklist))
michael@0 328 return ConvertUTF8toACE(output, output);
michael@0 329
michael@0 330 return NS_OK;
michael@0 331 }
michael@0 332
michael@0 333 NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString & input, bool * _isASCII, nsACString & _retval)
michael@0 334 {
michael@0 335 // If host is ACE, then convert to UTF-8 if the host is in the IDN whitelist.
michael@0 336 // Else, if host is already UTF-8, then make sure it is normalized per IDN.
michael@0 337
michael@0 338 nsresult rv = NS_OK;
michael@0 339
michael@0 340 // Even if the hostname is not ASCII, individual labels may still be ACE, so
michael@0 341 // test IsACE before testing IsASCII
michael@0 342 bool isACE;
michael@0 343 IsACE(input, &isACE);
michael@0 344
michael@0 345 if (IsASCII(input)) {
michael@0 346 // first, canonicalize the host to lowercase, for whitelist lookup
michael@0 347 _retval = input;
michael@0 348 ToLowerCase(_retval);
michael@0 349
michael@0 350 if (isACE && !mShowPunycode) {
michael@0 351 // ACEtoUTF8() can't fail, but might return the original ACE string
michael@0 352 nsAutoCString temp(_retval);
michael@0 353 if (isInWhitelist(temp)) {
michael@0 354 // If the domain is in the whitelist, return the host in UTF-8
michael@0 355 ACEtoUTF8(temp, _retval, false, true);
michael@0 356 } else {
michael@0 357 // Otherwise convert from ACE to UTF8 only those labels which are
michael@0 358 // considered safe for display
michael@0 359 SelectiveACEtoUTF8(temp, _retval);
michael@0 360 }
michael@0 361 *_isASCII = IsASCII(_retval);
michael@0 362 } else {
michael@0 363 *_isASCII = true;
michael@0 364 }
michael@0 365 } else {
michael@0 366 // We have to normalize the hostname before testing against the domain
michael@0 367 // whitelist (see bug 315411), and to ensure the entire string gets
michael@0 368 // normalized.
michael@0 369 //
michael@0 370 // Normalization and the tests for safe display below, assume that the
michael@0 371 // input is Unicode, so first convert any ACE labels to UTF8
michael@0 372 if (isACE) {
michael@0 373 nsAutoCString temp;
michael@0 374 ACEtoUTF8(input, temp, false, true);
michael@0 375 rv = Normalize(temp, _retval);
michael@0 376 } else {
michael@0 377 rv = Normalize(input, _retval);
michael@0 378 }
michael@0 379 if (NS_FAILED(rv)) return rv;
michael@0 380
michael@0 381 if (mShowPunycode && NS_SUCCEEDED(ConvertUTF8toACE(_retval, _retval))) {
michael@0 382 *_isASCII = true;
michael@0 383 return NS_OK;
michael@0 384 }
michael@0 385
michael@0 386 // normalization could result in an ASCII-only hostname. alternatively, if
michael@0 387 // the host is converted to ACE by the normalizer, then the host may contain
michael@0 388 // unsafe characters, so leave it ACE encoded. see bug 283016, bug 301694, and bug 309311.
michael@0 389 *_isASCII = IsASCII(_retval);
michael@0 390 if (!*_isASCII && !isInWhitelist(_retval)) {
michael@0 391 // SelectiveUTF8toACE may return a domain name where some labels are in UTF-8
michael@0 392 // and some are in ACE, depending on whether they are considered safe for
michael@0 393 // display
michael@0 394 rv = SelectiveUTF8toACE(_retval, _retval);
michael@0 395 *_isASCII = IsASCII(_retval);
michael@0 396 return rv;
michael@0 397 }
michael@0 398 }
michael@0 399
michael@0 400 return NS_OK;
michael@0 401 }
michael@0 402
michael@0 403 //-----------------------------------------------------------------------------
michael@0 404
michael@0 405 static nsresult utf16ToUcs4(const nsAString& in,
michael@0 406 uint32_t *out,
michael@0 407 uint32_t outBufLen,
michael@0 408 uint32_t *outLen)
michael@0 409 {
michael@0 410 uint32_t i = 0;
michael@0 411 nsAString::const_iterator start, end;
michael@0 412 in.BeginReading(start);
michael@0 413 in.EndReading(end);
michael@0 414
michael@0 415 while (start != end) {
michael@0 416 char16_t curChar;
michael@0 417
michael@0 418 curChar= *start++;
michael@0 419
michael@0 420 if (start != end &&
michael@0 421 NS_IS_HIGH_SURROGATE(curChar) &&
michael@0 422 NS_IS_LOW_SURROGATE(*start)) {
michael@0 423 out[i] = SURROGATE_TO_UCS4(curChar, *start);
michael@0 424 ++start;
michael@0 425 }
michael@0 426 else
michael@0 427 out[i] = curChar;
michael@0 428
michael@0 429 i++;
michael@0 430 if (i >= outBufLen)
michael@0 431 return NS_ERROR_FAILURE;
michael@0 432 }
michael@0 433 out[i] = (uint32_t)'\0';
michael@0 434 *outLen = i;
michael@0 435 return NS_OK;
michael@0 436 }
michael@0 437
michael@0 438 static void ucs4toUtf16(const uint32_t *in, nsAString& out)
michael@0 439 {
michael@0 440 while (*in) {
michael@0 441 if (!IS_IN_BMP(*in)) {
michael@0 442 out.Append((char16_t) H_SURROGATE(*in));
michael@0 443 out.Append((char16_t) L_SURROGATE(*in));
michael@0 444 }
michael@0 445 else
michael@0 446 out.Append((char16_t) *in);
michael@0 447 in++;
michael@0 448 }
michael@0 449 }
michael@0 450
michael@0 451 static nsresult punycode(const char* prefix, const nsAString& in, nsACString& out)
michael@0 452 {
michael@0 453 uint32_t ucs4Buf[kMaxDNSNodeLen + 1];
michael@0 454 uint32_t ucs4Len;
michael@0 455 nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
michael@0 456 NS_ENSURE_SUCCESS(rv, rv);
michael@0 457
michael@0 458 // need maximum 20 bits to encode 16 bit Unicode character
michael@0 459 // (include null terminator)
michael@0 460 const uint32_t kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
michael@0 461 char encodedBuf[kEncodedBufSize];
michael@0 462 punycode_uint encodedLength = kEncodedBufSize;
michael@0 463
michael@0 464 enum punycode_status status = punycode_encode(ucs4Len,
michael@0 465 ucs4Buf,
michael@0 466 nullptr,
michael@0 467 &encodedLength,
michael@0 468 encodedBuf);
michael@0 469
michael@0 470 if (punycode_success != status ||
michael@0 471 encodedLength >= kEncodedBufSize)
michael@0 472 return NS_ERROR_FAILURE;
michael@0 473
michael@0 474 encodedBuf[encodedLength] = '\0';
michael@0 475 out.Assign(nsDependentCString(prefix) + nsDependentCString(encodedBuf));
michael@0 476
michael@0 477 return rv;
michael@0 478 }
michael@0 479
michael@0 480 static nsresult encodeToRACE(const char* prefix, const nsAString& in, nsACString& out)
michael@0 481 {
michael@0 482 // need maximum 20 bits to encode 16 bit Unicode character
michael@0 483 // (include null terminator)
michael@0 484 const uint32_t kEncodedBufSize = kMaxDNSNodeLen * 20 / 8 + 1 + 1;
michael@0 485
michael@0 486 // set up a work buffer for RACE encoder
michael@0 487 char16_t temp[kMaxDNSNodeLen + 2];
michael@0 488 temp[0] = 0xFFFF; // set a place holder (to be filled by get_compress_mode)
michael@0 489 temp[in.Length() + 1] = (char16_t)'\0';
michael@0 490
michael@0 491 nsAString::const_iterator start, end;
michael@0 492 in.BeginReading(start);
michael@0 493 in.EndReading(end);
michael@0 494
michael@0 495 for (uint32_t i = 1; start != end; i++)
michael@0 496 temp[i] = *start++;
michael@0 497
michael@0 498 // encode nodes if non ASCII
michael@0 499
michael@0 500 char encodedBuf[kEncodedBufSize];
michael@0 501 idn_result_t result = race_compress_encode((const unsigned short *) temp,
michael@0 502 get_compress_mode((unsigned short *) temp + 1),
michael@0 503 encodedBuf, kEncodedBufSize);
michael@0 504 if (idn_success != result)
michael@0 505 return NS_ERROR_FAILURE;
michael@0 506
michael@0 507 out.Assign(prefix);
michael@0 508 out.Append(encodedBuf);
michael@0 509
michael@0 510 return NS_OK;
michael@0 511 }
michael@0 512
michael@0 513 // RFC 3454
michael@0 514 //
michael@0 515 // 1) Map -- For each character in the input, check if it has a mapping
michael@0 516 // and, if so, replace it with its mapping. This is described in section 3.
michael@0 517 //
michael@0 518 // 2) Normalize -- Possibly normalize the result of step 1 using Unicode
michael@0 519 // normalization. This is described in section 4.
michael@0 520 //
michael@0 521 // 3) Prohibit -- Check for any characters that are not allowed in the
michael@0 522 // output. If any are found, return an error. This is described in section
michael@0 523 // 5.
michael@0 524 //
michael@0 525 // 4) Check bidi -- Possibly check for right-to-left characters, and if any
michael@0 526 // are found, make sure that the whole string satisfies the requirements
michael@0 527 // for bidirectional strings. If the string does not satisfy the requirements
michael@0 528 // for bidirectional strings, return an error. This is described in section 6.
michael@0 529 //
michael@0 530 // 5) Check unassigned code points -- If allowUnassigned is false, check for
michael@0 531 // any unassigned Unicode points and if any are found return an error.
michael@0 532 // This is described in section 7.
michael@0 533 //
michael@0 534 nsresult nsIDNService::stringPrep(const nsAString& in, nsAString& out,
michael@0 535 bool allowUnassigned)
michael@0 536 {
michael@0 537 if (!mNamePrepHandle || !mNormalizer)
michael@0 538 return NS_ERROR_FAILURE;
michael@0 539
michael@0 540 uint32_t ucs4Buf[kMaxDNSNodeLen + 1];
michael@0 541 uint32_t ucs4Len;
michael@0 542 nsresult rv = utf16ToUcs4(in, ucs4Buf, kMaxDNSNodeLen, &ucs4Len);
michael@0 543 NS_ENSURE_SUCCESS(rv, rv);
michael@0 544
michael@0 545 // map
michael@0 546 idn_result_t idn_err;
michael@0 547
michael@0 548 uint32_t namePrepBuf[kMaxDNSNodeLen * 3]; // map up to three characters
michael@0 549 idn_err = idn_nameprep_map(mNamePrepHandle, (const uint32_t *) ucs4Buf,
michael@0 550 (uint32_t *) namePrepBuf, kMaxDNSNodeLen * 3);
michael@0 551 NS_ENSURE_TRUE(idn_err == idn_success, NS_ERROR_FAILURE);
michael@0 552
michael@0 553 nsAutoString namePrepStr;
michael@0 554 ucs4toUtf16(namePrepBuf, namePrepStr);
michael@0 555 if (namePrepStr.Length() >= kMaxDNSNodeLen)
michael@0 556 return NS_ERROR_FAILURE;
michael@0 557
michael@0 558 // normalize
michael@0 559 nsAutoString normlizedStr;
michael@0 560 rv = mNormalizer->NormalizeUnicodeNFKC(namePrepStr, normlizedStr);
michael@0 561 if (normlizedStr.Length() >= kMaxDNSNodeLen)
michael@0 562 return NS_ERROR_FAILURE;
michael@0 563
michael@0 564 // prohibit
michael@0 565 const uint32_t *found = nullptr;
michael@0 566 idn_err = idn_nameprep_isprohibited(mNamePrepHandle,
michael@0 567 (const uint32_t *) ucs4Buf, &found);
michael@0 568 if (idn_err != idn_success || found)
michael@0 569 return NS_ERROR_FAILURE;
michael@0 570
michael@0 571 // check bidi
michael@0 572 idn_err = idn_nameprep_isvalidbidi(mNamePrepHandle,
michael@0 573 (const uint32_t *) ucs4Buf, &found);
michael@0 574 if (idn_err != idn_success || found)
michael@0 575 return NS_ERROR_FAILURE;
michael@0 576
michael@0 577 if (!allowUnassigned) {
michael@0 578 // check unassigned code points
michael@0 579 idn_err = idn_nameprep_isunassigned(mNamePrepHandle,
michael@0 580 (const uint32_t *) ucs4Buf, &found);
michael@0 581 if (idn_err != idn_success || found)
michael@0 582 return NS_ERROR_FAILURE;
michael@0 583 }
michael@0 584
michael@0 585 // set the result string
michael@0 586 out.Assign(normlizedStr);
michael@0 587
michael@0 588 return rv;
michael@0 589 }
michael@0 590
michael@0 591 nsresult nsIDNService::encodeToACE(const nsAString& in, nsACString& out)
michael@0 592 {
michael@0 593 // RACE encode is supported for existing testing environment
michael@0 594 if (!strcmp("bq--", mACEPrefix))
michael@0 595 return encodeToRACE(mACEPrefix, in, out);
michael@0 596
michael@0 597 // use punycoce
michael@0 598 return punycode(mACEPrefix, in, out);
michael@0 599 }
michael@0 600
michael@0 601 nsresult nsIDNService::stringPrepAndACE(const nsAString& in, nsACString& out,
michael@0 602 bool allowUnassigned,
michael@0 603 bool convertAllLabels)
michael@0 604 {
michael@0 605 nsresult rv = NS_OK;
michael@0 606
michael@0 607 out.Truncate();
michael@0 608
michael@0 609 if (in.Length() > kMaxDNSNodeLen) {
michael@0 610 NS_WARNING("IDN node too large");
michael@0 611 return NS_ERROR_FAILURE;
michael@0 612 }
michael@0 613
michael@0 614 if (IsASCII(in))
michael@0 615 LossyCopyUTF16toASCII(in, out);
michael@0 616 else if (!convertAllLabels && isLabelSafe(in))
michael@0 617 CopyUTF16toUTF8(in, out);
michael@0 618 else {
michael@0 619 nsAutoString strPrep;
michael@0 620 rv = stringPrep(in, strPrep, allowUnassigned);
michael@0 621 if (NS_SUCCEEDED(rv)) {
michael@0 622 if (IsASCII(strPrep))
michael@0 623 LossyCopyUTF16toASCII(strPrep, out);
michael@0 624 else
michael@0 625 rv = encodeToACE(strPrep, out);
michael@0 626 }
michael@0 627 // Check that the encoded output isn't larger than the maximum length of an
michael@0 628 // DNS node per RFC 1034.
michael@0 629 // This test isn't necessary in the code paths above where the input is
michael@0 630 // ASCII (since the output will be the same length as the input) or where
michael@0 631 // we convert to UTF-8 (since the output is only used for display in the
michael@0 632 // UI and not passed to DNS and can legitimately be longer than the limit).
michael@0 633 if (out.Length() > kMaxDNSNodeLen) {
michael@0 634 NS_WARNING("IDN node too large");
michael@0 635 return NS_ERROR_FAILURE;
michael@0 636 }
michael@0 637 }
michael@0 638
michael@0 639 return rv;
michael@0 640 }
michael@0 641
michael@0 642 // RFC 3490
michael@0 643 // 1) Whenever dots are used as label separators, the following characters
michael@0 644 // MUST be recognized as dots: U+002E (full stop), U+3002 (ideographic full
michael@0 645 // stop), U+FF0E (fullwidth full stop), U+FF61 (halfwidth ideographic full
michael@0 646 // stop).
michael@0 647
michael@0 648 void nsIDNService::normalizeFullStops(nsAString& s)
michael@0 649 {
michael@0 650 nsAString::const_iterator start, end;
michael@0 651 s.BeginReading(start);
michael@0 652 s.EndReading(end);
michael@0 653 int32_t index = 0;
michael@0 654
michael@0 655 while (start != end) {
michael@0 656 switch (*start) {
michael@0 657 case 0x3002:
michael@0 658 case 0xFF0E:
michael@0 659 case 0xFF61:
michael@0 660 s.Replace(index, 1, NS_LITERAL_STRING("."));
michael@0 661 break;
michael@0 662 default:
michael@0 663 break;
michael@0 664 }
michael@0 665 start++;
michael@0 666 index++;
michael@0 667 }
michael@0 668 }
michael@0 669
michael@0 670 nsresult nsIDNService::decodeACE(const nsACString& in, nsACString& out,
michael@0 671 bool allowUnassigned, bool convertAllLabels)
michael@0 672 {
michael@0 673 bool isAce;
michael@0 674 IsACE(in, &isAce);
michael@0 675 if (!isAce) {
michael@0 676 out.Assign(in);
michael@0 677 return NS_OK;
michael@0 678 }
michael@0 679
michael@0 680 // RFC 3490 - 4.2 ToUnicode
michael@0 681 // The ToUnicode output never contains more code points than its input.
michael@0 682 punycode_uint output_length = in.Length() - kACEPrefixLen + 1;
michael@0 683 punycode_uint *output = new punycode_uint[output_length];
michael@0 684 NS_ENSURE_TRUE(output, NS_ERROR_OUT_OF_MEMORY);
michael@0 685
michael@0 686 enum punycode_status status = punycode_decode(in.Length() - kACEPrefixLen,
michael@0 687 PromiseFlatCString(in).get() + kACEPrefixLen,
michael@0 688 &output_length,
michael@0 689 output,
michael@0 690 nullptr);
michael@0 691 if (status != punycode_success) {
michael@0 692 delete [] output;
michael@0 693 return NS_ERROR_FAILURE;
michael@0 694 }
michael@0 695
michael@0 696 // UCS4 -> UTF8
michael@0 697 output[output_length] = 0;
michael@0 698 nsAutoString utf16;
michael@0 699 ucs4toUtf16(output, utf16);
michael@0 700 delete [] output;
michael@0 701 if (!convertAllLabels && !isLabelSafe(utf16)) {
michael@0 702 out.Assign(in);
michael@0 703 return NS_OK;
michael@0 704 }
michael@0 705 if (!isOnlySafeChars(utf16, mIDNBlacklist))
michael@0 706 return NS_ERROR_FAILURE;
michael@0 707 CopyUTF16toUTF8(utf16, out);
michael@0 708
michael@0 709 // Validation: encode back to ACE and compare the strings
michael@0 710 nsAutoCString ace;
michael@0 711 nsresult rv = UTF8toACE(out, ace, allowUnassigned, true);
michael@0 712 NS_ENSURE_SUCCESS(rv, rv);
michael@0 713
michael@0 714 if (!ace.Equals(in, nsCaseInsensitiveCStringComparator()))
michael@0 715 return NS_ERROR_FAILURE;
michael@0 716
michael@0 717 return NS_OK;
michael@0 718 }
michael@0 719
michael@0 720 bool nsIDNService::isInWhitelist(const nsACString &host)
michael@0 721 {
michael@0 722 if (mIDNUseWhitelist && mIDNWhitelistPrefBranch) {
michael@0 723 nsAutoCString tld(host);
michael@0 724 // make sure the host is ACE for lookup and check that there are no
michael@0 725 // unassigned codepoints
michael@0 726 if (!IsASCII(tld) && NS_FAILED(UTF8toACE(tld, tld, false, true))) {
michael@0 727 return false;
michael@0 728 }
michael@0 729
michael@0 730 // truncate trailing dots first
michael@0 731 tld.Trim(".");
michael@0 732 int32_t pos = tld.RFind(".");
michael@0 733 if (pos == kNotFound)
michael@0 734 return false;
michael@0 735
michael@0 736 tld.Cut(0, pos + 1);
michael@0 737
michael@0 738 bool safe;
michael@0 739 if (NS_SUCCEEDED(mIDNWhitelistPrefBranch->GetBoolPref(tld.get(), &safe)))
michael@0 740 return safe;
michael@0 741 }
michael@0 742
michael@0 743 return false;
michael@0 744 }
michael@0 745
michael@0 746 bool nsIDNService::isLabelSafe(const nsAString &label)
michael@0 747 {
michael@0 748 // We should never get here if the label is ASCII
michael@0 749 NS_ASSERTION(!IsASCII(label), "ASCII label in IDN checking");
michael@0 750 if (mRestrictionProfile == eASCIIOnlyProfile) {
michael@0 751 return false;
michael@0 752 }
michael@0 753
michael@0 754 nsAString::const_iterator current, end;
michael@0 755 label.BeginReading(current);
michael@0 756 label.EndReading(end);
michael@0 757
michael@0 758 int32_t lastScript = MOZ_SCRIPT_INVALID;
michael@0 759 uint32_t previousChar = 0;
michael@0 760 uint32_t savedNumberingSystem = 0;
michael@0 761 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
michael@0 762 #if 0
michael@0 763 HanVariantType savedHanVariant = HVT_NotHan;
michael@0 764 #endif
michael@0 765
michael@0 766 int32_t savedScript = -1;
michael@0 767
michael@0 768 while (current != end) {
michael@0 769 uint32_t ch = *current++;
michael@0 770
michael@0 771 if (NS_IS_HIGH_SURROGATE(ch) && current != end &&
michael@0 772 NS_IS_LOW_SURROGATE(*current)) {
michael@0 773 ch = SURROGATE_TO_UCS4(ch, *current++);
michael@0 774 }
michael@0 775
michael@0 776 // Check for restricted characters; aspirational scripts are permitted
michael@0 777 XidmodType xm = GetIdentifierModification(ch);
michael@0 778 int32_t script = GetScriptCode(ch);
michael@0 779 if (xm > XIDMOD_RECOMMENDED &&
michael@0 780 !(xm == XIDMOD_LIMITED_USE &&
michael@0 781 (script == MOZ_SCRIPT_CANADIAN_ABORIGINAL ||
michael@0 782 script == MOZ_SCRIPT_MIAO ||
michael@0 783 script == MOZ_SCRIPT_MONGOLIAN ||
michael@0 784 script == MOZ_SCRIPT_TIFINAGH ||
michael@0 785 script == MOZ_SCRIPT_YI))) {
michael@0 786 return false;
michael@0 787 }
michael@0 788
michael@0 789 // Check for mixed script
michael@0 790 if (script != MOZ_SCRIPT_COMMON &&
michael@0 791 script != MOZ_SCRIPT_INHERITED &&
michael@0 792 script != lastScript) {
michael@0 793 if (illegalScriptCombo(script, savedScript)) {
michael@0 794 return false;
michael@0 795 }
michael@0 796 lastScript = script;
michael@0 797 }
michael@0 798
michael@0 799 // Check for mixed numbering systems
michael@0 800 if (GetGeneralCategory(ch) ==
michael@0 801 HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
michael@0 802 uint32_t zeroCharacter = ch - GetNumericValue(ch);
michael@0 803 if (savedNumberingSystem == 0) {
michael@0 804 // If we encounter a decimal number, save the zero character from that
michael@0 805 // numbering system.
michael@0 806 savedNumberingSystem = zeroCharacter;
michael@0 807 } else if (zeroCharacter != savedNumberingSystem) {
michael@0 808 return false;
michael@0 809 }
michael@0 810 }
michael@0 811
michael@0 812 // Check for consecutive non-spacing marks
michael@0 813 if (previousChar != 0 &&
michael@0 814 previousChar == ch &&
michael@0 815 GetGeneralCategory(ch) == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
michael@0 816 return false;
michael@0 817 }
michael@0 818
michael@0 819 // Simplified/Traditional Chinese check temporarily disabled -- bug 857481
michael@0 820 #if 0
michael@0 821
michael@0 822 // Check for both simplified-only and traditional-only Chinese characters
michael@0 823 HanVariantType hanVariant = GetHanVariant(ch);
michael@0 824 if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
michael@0 825 if (savedHanVariant == HVT_NotHan) {
michael@0 826 savedHanVariant = hanVariant;
michael@0 827 } else if (hanVariant != savedHanVariant) {
michael@0 828 return false;
michael@0 829 }
michael@0 830 }
michael@0 831 #endif
michael@0 832
michael@0 833 previousChar = ch;
michael@0 834 }
michael@0 835 return true;
michael@0 836 }
michael@0 837
michael@0 838 // Scripts that we care about in illegalScriptCombo
michael@0 839 static const int32_t scriptTable[] = {
michael@0 840 MOZ_SCRIPT_BOPOMOFO, MOZ_SCRIPT_CYRILLIC, MOZ_SCRIPT_GREEK,
michael@0 841 MOZ_SCRIPT_HANGUL, MOZ_SCRIPT_HAN, MOZ_SCRIPT_HIRAGANA,
michael@0 842 MOZ_SCRIPT_KATAKANA, MOZ_SCRIPT_LATIN };
michael@0 843
michael@0 844 #define BOPO 0
michael@0 845 #define CYRL 1
michael@0 846 #define GREK 2
michael@0 847 #define HANG 3
michael@0 848 #define HANI 4
michael@0 849 #define HIRA 5
michael@0 850 #define KATA 6
michael@0 851 #define LATN 7
michael@0 852 #define OTHR 8
michael@0 853 #define JPAN 9 // Latin + Han + Hiragana + Katakana
michael@0 854 #define CHNA 10 // Latin + Han + Bopomofo
michael@0 855 #define KORE 11 // Latin + Han + Hangul
michael@0 856 #define HNLT 12 // Latin + Han (could be any of the above combinations)
michael@0 857 #define FAIL 13
michael@0 858
michael@0 859 static inline int32_t findScriptIndex(int32_t aScript)
michael@0 860 {
michael@0 861 int32_t tableLength = sizeof(scriptTable) / sizeof(int32_t);
michael@0 862 for (int32_t index = 0; index < tableLength; ++index) {
michael@0 863 if (aScript == scriptTable[index]) {
michael@0 864 return index;
michael@0 865 }
michael@0 866 }
michael@0 867 return OTHR;
michael@0 868 }
michael@0 869
michael@0 870 static const int32_t scriptComboTable[13][9] = {
michael@0 871 /* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
michael@0 872 * savedScript */
michael@0 873 /* BOPO */ { BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL },
michael@0 874 /* CYRL */ { FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL },
michael@0 875 /* GREK */ { FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL },
michael@0 876 /* HANG */ { FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL },
michael@0 877 /* HANI */ { CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL },
michael@0 878 /* HIRA */ { FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL },
michael@0 879 /* KATA */ { FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL },
michael@0 880 /* LATN */ { CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR },
michael@0 881 /* OTHR */ { FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL },
michael@0 882 /* JPAN */ { FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL },
michael@0 883 /* CHNA */ { CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL },
michael@0 884 /* KORE */ { FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL },
michael@0 885 /* HNLT */ { CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL }
michael@0 886 };
michael@0 887
michael@0 888 bool nsIDNService::illegalScriptCombo(int32_t script, int32_t& savedScript)
michael@0 889 {
michael@0 890 if (savedScript == -1) {
michael@0 891 savedScript = findScriptIndex(script);
michael@0 892 return false;
michael@0 893 }
michael@0 894
michael@0 895 savedScript = scriptComboTable[savedScript] [findScriptIndex(script)];
michael@0 896 /*
michael@0 897 * Special case combinations that depend on which profile is in use
michael@0 898 * In the Highly Restrictive profile Latin is not allowed with any
michael@0 899 * other script
michael@0 900 *
michael@0 901 * In the Moderately Restrictive profile Latin mixed with any other
michael@0 902 * single script is allowed.
michael@0 903 */
michael@0 904 return ((savedScript == OTHR &&
michael@0 905 mRestrictionProfile == eHighlyRestrictiveProfile) ||
michael@0 906 savedScript == FAIL);
michael@0 907 }
michael@0 908
michael@0 909 #undef BOPO
michael@0 910 #undef CYRL
michael@0 911 #undef GREK
michael@0 912 #undef HANG
michael@0 913 #undef HANI
michael@0 914 #undef HIRA
michael@0 915 #undef KATA
michael@0 916 #undef LATN
michael@0 917 #undef OTHR
michael@0 918 #undef JPAN
michael@0 919 #undef CHNA
michael@0 920 #undef KORE
michael@0 921 #undef HNLT
michael@0 922 #undef FAIL

mercurial