netwerk/test/unit/test_idn_urls.js

Thu, 15 Jan 2015 15:55:04 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 15:55:04 +0100
branch
TOR_BUG_9701
changeset 9
a63d609f5ebe
permissions
-rw-r--r--

Back out 97036ab72558 which inappropriately compared turds to third parties.

     1 // Test algorithm for unicode display of IDNA URL (bug 722299)
     2 const testcases = [
     3     //  Original             Punycode or         Expected UTF-8 by profile
     4     //    URL              normalized form      ASCII-Only, High, Moderate
     5     //
     6     // Latin script
     7     ["cuillère", "xn--cuillre-6xa",                  false, true,  true],
     9     // repeated non-spacing marks
    10     ["gruz̀̀ere",  "xn--gruzere-ogea",                 false, false, false],
    12     // non-XID character
    13     ["I♥NY",     "xn--iny-zx5a",                     false, false, false],
    15     // new non-XID character in Unicode 6.3
    16     ["حلا\u061cل", "xn--bgbvr6gc",                    false, false, false],
    18     // U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490)
    19     ["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup",     false, true,  true],
    21     // Cyrillic alone
    22     ["толсто́й",  "xn--lsa83dealbred",                false, true,  true],
    24     // Mixed script Cyrillic/Latin
    25     ["толсто́й-in-Russian",
    26                  "xn---in-russian-1jg071b0a8bb4cpd", false, false, false],
    28     // Mixed script Latin/Cyrillic
    29     ["war-and-миръ", "xn--war-and--b9g3b7b3h",       false, false, false],
    31     // Cherokee (Restricted script)
    32     ["ᏣᎳᎩ",     "xn--f9dt7l",                        false, false, false],
    34     // Yi (Aspirational script)
    35     ["ꆈꌠꁱꂷ", "xn--4o7a6e1x64c",                  false, true,  true],
    37     // Greek alone
    38     ["πλάτων",   "xn--hxa3ahjw4a",                   false, true,  true],
    40     // Mixed script Greek/Latin
    41     ["πλάτωνicrelationship",
    42                  "xn--icrelationship-96j4t9a3cwe2e", false, false, false],
    44     // Mixed script Latin/Greek
    45     ["spaceὈδύσσεια", "xn--space-h9dui0b0ga2j1562b", false, false, false],
    47     // Devanagari alone
    48     ["मराठी",    "xn--d2b1ag0dl",                    false, true,  true],
    50     // Devanagari with Armenian
    51     ["मराठीՀայաստան",
    52                  "xn--y9aaa1d0ai1cq964f8dwa2o1a",    false, false, false],
    54     // Devanagari with common
    55     ["मराठी123", "xn--123-mhh3em2hra",               false, true,  true],
    57     // Common with Devanagari
    58     ["123मराठी", "xn--123-phh3em2hra",               false, true,  true],
    60     // Latin with Han
    61     ["chairman毛",
    62                  "xn--chairman-k65r",                false, true,  true],
    64     // Han with Latin
    65     ["山葵sauce", "xn--sauce-6j9ii40v",              false, true,  true],
    67     // Latin with Han, Hiragana and Katakana
    68     ["van語ではドイ", "xn--van-ub4bpb6w0in486d",     false, true,  true],
    70     // Latin with Han, Katakana and Hiragana
    71     ["van語ドイでは", "xn--van-ub4bpb4w0ip486d",     false, true,  true],
    73     // Latin with Hiragana, Han and Katakana
    74     ["vanでは語ドイ", "xn--van-ub4bpb6w0ip486d",     false, true,  true],
    76     // Latin with Hiragana, Katakana and Han
    77     ["vanではドイ語", "xn--van-ub4bpb6w0ir486d",     false, true,  true],
    79     // Latin with Katakana, Han and Hiragana
    80     ["vanドイ語では", "xn--van-ub4bpb4w0ir486d",     false, true,  true],
    82     // Latin with Katakana, Hiragana and Han
    83     ["vanドイでは語", "xn--van-ub4bpb4w0it486d",     false, true,  true],
    85     // Han with Latin, Hiragana and Katakana
    86     ["語vanではドイ", "xn--van-ub4bpb6w0ik486d",     false, true,  true],
    88     // Han with Latin, Katakana and Hiragana
    89     ["語vanドイでは", "xn--van-ub4bpb4w0im486d",     false, true,  true],
    91     // Han with Hiragana, Latin and Katakana
    92     ["語ではvanドイ", "xn--van-rb4bpb9w0ik486d",     false, true,  true],
    94     // Han with Hiragana, Katakana and Latin
    95     ["語ではドイvan", "xn--van-rb4bpb6w0in486d",     false, true,  true],
    97     // Han with Katakana, Latin and Hiragana
    98     ["語ドイvanでは", "xn--van-ub4bpb1w0ip486d",     false, true,  true],
   100     // Han with Katakana, Hiragana and Latin
   101     ["語ドイではvan", "xn--van-rb4bpb4w0ip486d",     false, true,  true],
   103     // Hiragana with Latin, Han and Katakana
   104     ["イツvan語ではド", "xn--van-ub4bpb1wvhsbx330n", false, true,  true],
   106     // Hiragana with Latin, Katakana and Han
   107     ["ではvanドイ語", "xn--van-rb4bpb9w0ir486d",     false, true,  true],
   109     // Hiragana with Han, Latin and Katakana
   110     ["では語vanドイ", "xn--van-rb4bpb9w0im486d",     false, true,  true],
   112     // Hiragana with Han, Katakana and Latin
   113     ["では語ドイvan", "xn--van-rb4bpb6w0ip486d",     false, true,  true],
   115     // Hiragana with Katakana, Latin and Han
   116     ["ではドイvan語", "xn--van-rb4bpb6w0iu486d",     false, true,  true],
   118     // Hiragana with Katakana, Han and Latin
   119     ["ではドイ語van", "xn--van-rb4bpb6w0ir486d",     false, true,  true],
   121     // Katakana with Latin, Han and Hiragana
   122     ["ドイvan語では", "xn--van-ub4bpb1w0iu486d",     false, true,  true],
   124     // Katakana with Latin, Hiragana and Han
   125     ["ドイvanでは語", "xn--van-ub4bpb1w0iw486d",     false, true,  true],
   127     // Katakana with Han, Latin and Hiragana
   128     ["ドイ語vanでは", "xn--van-ub4bpb1w0ir486d",     false, true,  true],
   130     // Katakana with Han, Hiragana and Latin
   131     ["ドイ語ではvan", "xn--van-rb4bpb4w0ir486d",     false, true,  true],
   133     // Katakana with Hiragana, Latin and Han
   134     ["ドイではvan語", "xn--van-rb4bpb4w0iw486d",     false, true,  true],
   136     // Katakana with Hiragana, Han and Latin
   137     ["ドイでは語van", "xn--van-rb4bpb4w0it486d",     false, true,  true],
   139     // Han with common
   140     ["中国123",   "xn--123-u68dy61b",                false, true,  true],
   142     // common with Han
   143     ["123中国",   "xn--123-x68dy61b",                false, true,  true],
   145     // Characters that normalize to permitted characters
   146     //  (also tests Plane 1 supplementary characters)
   147     ["super𝟖",   "super8",                           true,  true,  true],
   149     // Han from Plane 2
   150     ["𠀀𠀁𠀂", "xn--j50icd",                         false, true,  true],
   152     // Han from Plane 2 with js (UTF-16) escapes
   153     ["\uD840\uDC00\uD840\uDC01\uD840\uDC02",
   154             "xn--j50icd",                            false, true,  true],
   156     // Same with a lone high surrogate at the end
   157     ["\uD840\uDC00\uD840\uDC01\uD840", "",           false, false, false],
   159     // Latin text and Bengali digits
   160     ["super৪",   "xn--super-k2l",                    false, false, true],
   162     // Bengali digits and Latin text
   163     ["৫ab",   "xn--ab-x5f",                          false, false, true],
   165     // Bengali text and Latin digits
   166     ["অঙ্কুর8",    "xn--8-70d2cp0j6dtd",               false, true,  true],
   168     // Latin digits and Bengali text
   169     ["5াব",        "xn--5-h3d7c",                    false, true,  true],
   171     // Mixed numbering systems
   172     ["٢٠۰٠",     "xn--8hbae38c",                     false, false, false],
   174     // Traditional Chinese
   175     ["萬城",     "xn--uis754h",                      false, true,  true],
   177     // Simplified Chinese
   178     ["万城",     "xn--chq31v",                       false, true,  true],
   180     // Simplified-only and Traditional-only Chinese in the same label
   181     ["万萬城",   "xn--chq31vsl1b",                   false, true,  true],
   183     // Traditional-only and Simplified-only Chinese in the same label
   184     ["萬万城",   "xn--chq31vrl1b",                   false, true,  true],
   186     // Han and Latin and Bopomofo
   187     ["注音符号bopomofoㄅㄆㄇㄈ",
   188                  "xn--bopomofo-hj5gkalm1637i876cuw0brk5f",
   189                                                      false, true,  true],
   191     // Han, bopomofo, Latin
   192     ["注音符号ㄅㄆㄇㄈbopomofo",
   193                  "xn--bopomofo-8i5gkalm9637i876cuw0brk5f",
   194                                                      false, true,  true],
   196     // Latin, Han, Bopomofo
   197     ["bopomofo注音符号ㄅㄆㄇㄈ",
   198                  "xn--bopomofo-hj5gkalm9637i876cuw0brk5f",
   199                                                      false, true,  true],
   201     // Latin, Bopomofo, Han
   202     ["bopomofoㄅㄆㄇㄈ注音符号",
   203                  "xn--bopomofo-hj5gkalm3737i876cuw0brk5f",
   204                                                      false, true,  true],
   206     // Bopomofo, Han, Latin
   207     ["ㄅㄆㄇㄈ注音符号bopomofo",
   208                  "xn--bopomofo-8i5gkalm3737i876cuw0brk5f",
   209                                                      false, true,  true],
   211     // Bopomofo, Latin, Han
   212     ["ㄅㄆㄇㄈbopomofo注音符号",
   213                  "xn--bopomofo-8i5gkalm1837i876cuw0brk5f",
   214                                                      false, true,  true],
   216     // Han, bopomofo and katakana
   217     ["注音符号ㄅㄆㄇㄈボポモフォ",
   218                  "xn--jckteuaez1shij0450gylvccz9asi4e",
   219                                                      false, false, false],
   221     // Han, katakana, bopomofo
   222     ["注音符号ボポモフォㄅㄆㄇㄈ",
   223                  "xn--jckteuaez6shij5350gylvccz9asi4e",
   224                                                      false, false, false],
   226     // bopomofo, han, katakana
   227     ["ㄅㄆㄇㄈ注音符号ボポモフォ",
   228                  "xn--jckteuaez1shij4450gylvccz9asi4e",
   229                                                      false, false, false],
   231     // bopomofo, katakana, han
   232     ["ㄅㄆㄇㄈボポモフォ注音符号",
   233                  "xn--jckteuaez1shij9450gylvccz9asi4e",
   234                                                      false, false, false],
   236     // katakana, Han, bopomofo
   237     ["ボポモフォ注音符号ㄅㄆㄇㄈ",
   238                  "xn--jckteuaez6shij0450gylvccz9asi4e",
   239                                                      false, false, false],
   241     // katakana, bopomofo, Han
   242     ["ボポモフォㄅㄆㄇㄈ注音符号",
   243                  "xn--jckteuaez6shij4450gylvccz9asi4e",
   244                                                      false, false, false],
   246     // Han, Hangul and Latin
   247     ["韓한글hangul",
   248                  "xn--hangul-2m5ti09k79ze",          false, true,  true],
   250     // Han, Latin and Hangul
   251     ["韓hangul한글",
   252                  "xn--hangul-2m5to09k79ze",          false, true,  true],
   254     // Hangul, Han and Latin
   255     ["한글韓hangul",
   256                  "xn--hangul-2m5th09k79ze",          false, true,  true],
   258     // Hangul, Latin and Han
   259     ["한글hangul韓",
   260                  "xn--hangul-8m5t898k79ze",          false, true,  true],
   262     // Latin, Han and Hangul
   263     ["hangul韓한글",
   264                  "xn--hangul-8m5ti09k79ze",          false, true,  true],
   266     // Latin, Hangul and Han
   267     ["hangul한글韓",
   268                  "xn--hangul-8m5th09k79ze",          false, true,  true],
   270     // Hangul and katakana
   271     ["한글ハングル",
   272                  "xn--qck1c2d4a9266lkmzb",           false, false, false],
   274     // Katakana and Hangul
   275     ["ハングル한글",
   276                  "xn--qck1c2d4a2366lkmzb",           false, false, false],
   278     // Thai (also tests that node with over 63 UTF-8 octets doesn't fail)
   279     ["เครื่องทําน้ําทําน้ําแข็ง",
   280                  "xn--22cdjb2fanb9fyepcbbb9dwh4a3igze4fdcd",
   281                                                      false, true, true]
   282 ];
   285 const profiles = ["ASCII", "high", "moderate"];
   287 function run_test() {
   288     var pbi = Cc["@mozilla.org/preferences-service;1"].getService(Ci.nsIPrefBranch);
   289     var oldProfile = pbi.getCharPref("network.IDN.restriction_profile", "moderate");
   290     var oldWhiteListCom;
   291     try {
   292         oldWhitelistCom = pbi.getBoolPref("network.IDN.whitelist.com");
   293     } catch(e) {
   294         oldWhitelistCom = false;
   295     }
   296     var idnService = Cc["@mozilla.org/network/idn-service;1"].getService(Ci.nsIIDNService);
   298     for (var i = 0; i < profiles.length; ++i) {
   299         pbi.setCharPref("network.IDN.restriction_profile", profiles[i]);
   300         pbi.setBoolPref("network.IDN.whitelist.com", false);
   302         dump("testing " + profiles[i] + " profile");
   304         for (var j = 0; j < testcases.length; ++j) {
   305             var test = testcases[j];
   306             var URL = test[0] + ".com";
   307             var punycodeURL = test[1] + ".com";
   308             var expectedUnicode = test[2 + i];
   309             var isASCII = {};
   311 	    var result;
   312 	    try {
   313 		result = idnService.convertToDisplayIDN(URL, isASCII);
   314 	    } catch(e) {
   315 		result = ".com";
   316 	    }
   317             if (punycodeURL.substr(0, 4) == "xn--") {
   318                 // test convertToDisplayIDN with a Unicode URL and with a
   319                 //  Punycode URL if we have one
   320                 do_check_eq(escape(result),
   321                             expectedUnicode ? escape(URL) : escape(punycodeURL));
   323                 result = idnService.convertToDisplayIDN(punycodeURL, isASCII);
   324                 do_check_eq(escape(result),
   325                             expectedUnicode ? escape(URL) : escape(punycodeURL));
   326             } else {
   327                 // The "punycode" URL isn't punycode. This happens in testcases
   328                 // where the Unicode URL has become normalized to an ASCII URL,
   329                 // so, even though expectedUnicode is true, the expected result
   330                 // is equal to punycodeURL
   331                 do_check_eq(escape(result), escape(punycodeURL));
   332             }
   333         }
   334     }
   335     pbi.setBoolPref("network.IDN.whitelist.com", oldWhitelistCom);
   336     pbi.setCharPref("network.IDN.restriction_profile", oldProfile);
   337 }

mercurial