netwerk/test/unit/test_idn_urls.js

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/netwerk/test/unit/test_idn_urls.js	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,337 @@
     1.4 +// Test algorithm for unicode display of IDNA URL (bug 722299)
     1.5 +const testcases = [
     1.6 +    //  Original             Punycode or         Expected UTF-8 by profile
     1.7 +    //    URL              normalized form      ASCII-Only, High, Moderate
     1.8 +    //
     1.9 +    // Latin script
    1.10 +    ["cuillère", "xn--cuillre-6xa",                  false, true,  true],
    1.11 +
    1.12 +    // repeated non-spacing marks
    1.13 +    ["gruz̀̀ere",  "xn--gruzere-ogea",                 false, false, false],
    1.14 +
    1.15 +    // non-XID character
    1.16 +    ["I♥NY",     "xn--iny-zx5a",                     false, false, false],
    1.17 +
    1.18 +    // new non-XID character in Unicode 6.3
    1.19 +    ["حلا\u061cل", "xn--bgbvr6gc",                    false, false, false],
    1.20 +
    1.21 +    // U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490)
    1.22 +    ["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup",     false, true,  true],
    1.23 +
    1.24 +    // Cyrillic alone
    1.25 +    ["толсто́й",  "xn--lsa83dealbred",                false, true,  true],
    1.26 +
    1.27 +    // Mixed script Cyrillic/Latin
    1.28 +    ["толсто́й-in-Russian",
    1.29 +                 "xn---in-russian-1jg071b0a8bb4cpd", false, false, false],
    1.30 +
    1.31 +    // Mixed script Latin/Cyrillic
    1.32 +    ["war-and-миръ", "xn--war-and--b9g3b7b3h",       false, false, false],
    1.33 +
    1.34 +    // Cherokee (Restricted script)
    1.35 +    ["ᏣᎳᎩ",     "xn--f9dt7l",                        false, false, false],
    1.36 +
    1.37 +    // Yi (Aspirational script)
    1.38 +    ["ꆈꌠꁱꂷ", "xn--4o7a6e1x64c",                  false, true,  true],
    1.39 +
    1.40 +    // Greek alone
    1.41 +    ["πλάτων",   "xn--hxa3ahjw4a",                   false, true,  true],
    1.42 +
    1.43 +    // Mixed script Greek/Latin
    1.44 +    ["πλάτωνicrelationship",
    1.45 +                 "xn--icrelationship-96j4t9a3cwe2e", false, false, false],
    1.46 +
    1.47 +    // Mixed script Latin/Greek
    1.48 +    ["spaceὈδύσσεια", "xn--space-h9dui0b0ga2j1562b", false, false, false],
    1.49 +
    1.50 +    // Devanagari alone
    1.51 +    ["मराठी",    "xn--d2b1ag0dl",                    false, true,  true],
    1.52 +
    1.53 +    // Devanagari with Armenian
    1.54 +    ["मराठीՀայաստան",
    1.55 +                 "xn--y9aaa1d0ai1cq964f8dwa2o1a",    false, false, false],
    1.56 +
    1.57 +    // Devanagari with common
    1.58 +    ["मराठी123", "xn--123-mhh3em2hra",               false, true,  true],
    1.59 +
    1.60 +    // Common with Devanagari
    1.61 +    ["123मराठी", "xn--123-phh3em2hra",               false, true,  true],
    1.62 +
    1.63 +    // Latin with Han
    1.64 +    ["chairman毛",
    1.65 +                 "xn--chairman-k65r",                false, true,  true],
    1.66 +
    1.67 +    // Han with Latin
    1.68 +    ["山葵sauce", "xn--sauce-6j9ii40v",              false, true,  true],
    1.69 +
    1.70 +    // Latin with Han, Hiragana and Katakana
    1.71 +    ["van語ではドイ", "xn--van-ub4bpb6w0in486d",     false, true,  true],
    1.72 +
    1.73 +    // Latin with Han, Katakana and Hiragana
    1.74 +    ["van語ドイでは", "xn--van-ub4bpb4w0ip486d",     false, true,  true],
    1.75 +
    1.76 +    // Latin with Hiragana, Han and Katakana
    1.77 +    ["vanでは語ドイ", "xn--van-ub4bpb6w0ip486d",     false, true,  true],
    1.78 +
    1.79 +    // Latin with Hiragana, Katakana and Han
    1.80 +    ["vanではドイ語", "xn--van-ub4bpb6w0ir486d",     false, true,  true],
    1.81 +
    1.82 +    // Latin with Katakana, Han and Hiragana
    1.83 +    ["vanドイ語では", "xn--van-ub4bpb4w0ir486d",     false, true,  true],
    1.84 +
    1.85 +    // Latin with Katakana, Hiragana and Han
    1.86 +    ["vanドイでは語", "xn--van-ub4bpb4w0it486d",     false, true,  true],
    1.87 +
    1.88 +    // Han with Latin, Hiragana and Katakana
    1.89 +    ["語vanではドイ", "xn--van-ub4bpb6w0ik486d",     false, true,  true],
    1.90 +
    1.91 +    // Han with Latin, Katakana and Hiragana
    1.92 +    ["語vanドイでは", "xn--van-ub4bpb4w0im486d",     false, true,  true],
    1.93 +
    1.94 +    // Han with Hiragana, Latin and Katakana
    1.95 +    ["語ではvanドイ", "xn--van-rb4bpb9w0ik486d",     false, true,  true],
    1.96 +
    1.97 +    // Han with Hiragana, Katakana and Latin
    1.98 +    ["語ではドイvan", "xn--van-rb4bpb6w0in486d",     false, true,  true],
    1.99 +
   1.100 +    // Han with Katakana, Latin and Hiragana
   1.101 +    ["語ドイvanでは", "xn--van-ub4bpb1w0ip486d",     false, true,  true],
   1.102 +
   1.103 +    // Han with Katakana, Hiragana and Latin
   1.104 +    ["語ドイではvan", "xn--van-rb4bpb4w0ip486d",     false, true,  true],
   1.105 +
   1.106 +    // Hiragana with Latin, Han and Katakana
   1.107 +    ["イツvan語ではド", "xn--van-ub4bpb1wvhsbx330n", false, true,  true],
   1.108 +
   1.109 +    // Hiragana with Latin, Katakana and Han
   1.110 +    ["ではvanドイ語", "xn--van-rb4bpb9w0ir486d",     false, true,  true],
   1.111 +
   1.112 +    // Hiragana with Han, Latin and Katakana
   1.113 +    ["では語vanドイ", "xn--van-rb4bpb9w0im486d",     false, true,  true],
   1.114 +
   1.115 +    // Hiragana with Han, Katakana and Latin
   1.116 +    ["では語ドイvan", "xn--van-rb4bpb6w0ip486d",     false, true,  true],
   1.117 +
   1.118 +    // Hiragana with Katakana, Latin and Han
   1.119 +    ["ではドイvan語", "xn--van-rb4bpb6w0iu486d",     false, true,  true],
   1.120 +
   1.121 +    // Hiragana with Katakana, Han and Latin
   1.122 +    ["ではドイ語van", "xn--van-rb4bpb6w0ir486d",     false, true,  true],
   1.123 +
   1.124 +    // Katakana with Latin, Han and Hiragana
   1.125 +    ["ドイvan語では", "xn--van-ub4bpb1w0iu486d",     false, true,  true],
   1.126 +
   1.127 +    // Katakana with Latin, Hiragana and Han
   1.128 +    ["ドイvanでは語", "xn--van-ub4bpb1w0iw486d",     false, true,  true],
   1.129 +
   1.130 +    // Katakana with Han, Latin and Hiragana
   1.131 +    ["ドイ語vanでは", "xn--van-ub4bpb1w0ir486d",     false, true,  true],
   1.132 +
   1.133 +    // Katakana with Han, Hiragana and Latin
   1.134 +    ["ドイ語ではvan", "xn--van-rb4bpb4w0ir486d",     false, true,  true],
   1.135 +
   1.136 +    // Katakana with Hiragana, Latin and Han
   1.137 +    ["ドイではvan語", "xn--van-rb4bpb4w0iw486d",     false, true,  true],
   1.138 +
   1.139 +    // Katakana with Hiragana, Han and Latin
   1.140 +    ["ドイでは語van", "xn--van-rb4bpb4w0it486d",     false, true,  true],
   1.141 +
   1.142 +    // Han with common
   1.143 +    ["中国123",   "xn--123-u68dy61b",                false, true,  true],
   1.144 +
   1.145 +    // common with Han
   1.146 +    ["123中国",   "xn--123-x68dy61b",                false, true,  true],
   1.147 +
   1.148 +    // Characters that normalize to permitted characters
   1.149 +    //  (also tests Plane 1 supplementary characters)
   1.150 +    ["super𝟖",   "super8",                           true,  true,  true],
   1.151 +
   1.152 +    // Han from Plane 2
   1.153 +    ["𠀀𠀁𠀂", "xn--j50icd",                         false, true,  true],
   1.154 +
   1.155 +    // Han from Plane 2 with js (UTF-16) escapes
   1.156 +    ["\uD840\uDC00\uD840\uDC01\uD840\uDC02",
   1.157 +            "xn--j50icd",                            false, true,  true],
   1.158 +
   1.159 +    // Same with a lone high surrogate at the end
   1.160 +    ["\uD840\uDC00\uD840\uDC01\uD840", "",           false, false, false],
   1.161 +
   1.162 +    // Latin text and Bengali digits
   1.163 +    ["super৪",   "xn--super-k2l",                    false, false, true],
   1.164 +
   1.165 +    // Bengali digits and Latin text
   1.166 +    ["৫ab",   "xn--ab-x5f",                          false, false, true],
   1.167 +
   1.168 +    // Bengali text and Latin digits
   1.169 +    ["অঙ্কুর8",    "xn--8-70d2cp0j6dtd",               false, true,  true],
   1.170 +
   1.171 +    // Latin digits and Bengali text
   1.172 +    ["5াব",        "xn--5-h3d7c",                    false, true,  true],
   1.173 +
   1.174 +    // Mixed numbering systems
   1.175 +    ["٢٠۰٠",     "xn--8hbae38c",                     false, false, false],
   1.176 +
   1.177 +    // Traditional Chinese
   1.178 +    ["萬城",     "xn--uis754h",                      false, true,  true],
   1.179 +
   1.180 +    // Simplified Chinese
   1.181 +    ["万城",     "xn--chq31v",                       false, true,  true],
   1.182 +
   1.183 +    // Simplified-only and Traditional-only Chinese in the same label
   1.184 +    ["万萬城",   "xn--chq31vsl1b",                   false, true,  true],
   1.185 +
   1.186 +    // Traditional-only and Simplified-only Chinese in the same label
   1.187 +    ["萬万城",   "xn--chq31vrl1b",                   false, true,  true],
   1.188 +
   1.189 +    // Han and Latin and Bopomofo
   1.190 +    ["注音符号bopomofoㄅㄆㄇㄈ",
   1.191 +                 "xn--bopomofo-hj5gkalm1637i876cuw0brk5f",
   1.192 +                                                     false, true,  true],
   1.193 +
   1.194 +    // Han, bopomofo, Latin
   1.195 +    ["注音符号ㄅㄆㄇㄈbopomofo",
   1.196 +                 "xn--bopomofo-8i5gkalm9637i876cuw0brk5f",
   1.197 +                                                     false, true,  true],
   1.198 +
   1.199 +    // Latin, Han, Bopomofo
   1.200 +    ["bopomofo注音符号ㄅㄆㄇㄈ",
   1.201 +                 "xn--bopomofo-hj5gkalm9637i876cuw0brk5f",
   1.202 +                                                     false, true,  true],
   1.203 +
   1.204 +    // Latin, Bopomofo, Han
   1.205 +    ["bopomofoㄅㄆㄇㄈ注音符号",
   1.206 +                 "xn--bopomofo-hj5gkalm3737i876cuw0brk5f",
   1.207 +                                                     false, true,  true],
   1.208 +
   1.209 +    // Bopomofo, Han, Latin
   1.210 +    ["ㄅㄆㄇㄈ注音符号bopomofo",
   1.211 +                 "xn--bopomofo-8i5gkalm3737i876cuw0brk5f",
   1.212 +                                                     false, true,  true],
   1.213 +
   1.214 +    // Bopomofo, Latin, Han
   1.215 +    ["ㄅㄆㄇㄈbopomofo注音符号",
   1.216 +                 "xn--bopomofo-8i5gkalm1837i876cuw0brk5f",
   1.217 +                                                     false, true,  true],
   1.218 +
   1.219 +    // Han, bopomofo and katakana
   1.220 +    ["注音符号ㄅㄆㄇㄈボポモフォ",
   1.221 +                 "xn--jckteuaez1shij0450gylvccz9asi4e",
   1.222 +                                                     false, false, false],
   1.223 +
   1.224 +    // Han, katakana, bopomofo
   1.225 +    ["注音符号ボポモフォㄅㄆㄇㄈ",
   1.226 +                 "xn--jckteuaez6shij5350gylvccz9asi4e",
   1.227 +                                                     false, false, false],
   1.228 +
   1.229 +    // bopomofo, han, katakana
   1.230 +    ["ㄅㄆㄇㄈ注音符号ボポモフォ",
   1.231 +                 "xn--jckteuaez1shij4450gylvccz9asi4e",
   1.232 +                                                     false, false, false],
   1.233 +
   1.234 +    // bopomofo, katakana, han
   1.235 +    ["ㄅㄆㄇㄈボポモフォ注音符号",
   1.236 +                 "xn--jckteuaez1shij9450gylvccz9asi4e",
   1.237 +                                                     false, false, false],
   1.238 +
   1.239 +    // katakana, Han, bopomofo
   1.240 +    ["ボポモフォ注音符号ㄅㄆㄇㄈ",
   1.241 +                 "xn--jckteuaez6shij0450gylvccz9asi4e",
   1.242 +                                                     false, false, false],
   1.243 +
   1.244 +    // katakana, bopomofo, Han
   1.245 +    ["ボポモフォㄅㄆㄇㄈ注音符号",
   1.246 +                 "xn--jckteuaez6shij4450gylvccz9asi4e",
   1.247 +                                                     false, false, false],
   1.248 +
   1.249 +    // Han, Hangul and Latin
   1.250 +    ["韓한글hangul",
   1.251 +                 "xn--hangul-2m5ti09k79ze",          false, true,  true],
   1.252 +
   1.253 +    // Han, Latin and Hangul
   1.254 +    ["韓hangul한글",
   1.255 +                 "xn--hangul-2m5to09k79ze",          false, true,  true],
   1.256 +
   1.257 +    // Hangul, Han and Latin
   1.258 +    ["한글韓hangul",
   1.259 +                 "xn--hangul-2m5th09k79ze",          false, true,  true],
   1.260 +
   1.261 +    // Hangul, Latin and Han
   1.262 +    ["한글hangul韓",
   1.263 +                 "xn--hangul-8m5t898k79ze",          false, true,  true],
   1.264 +
   1.265 +    // Latin, Han and Hangul
   1.266 +    ["hangul韓한글",
   1.267 +                 "xn--hangul-8m5ti09k79ze",          false, true,  true],
   1.268 +
   1.269 +    // Latin, Hangul and Han
   1.270 +    ["hangul한글韓",
   1.271 +                 "xn--hangul-8m5th09k79ze",          false, true,  true],
   1.272 +
   1.273 +    // Hangul and katakana
   1.274 +    ["한글ハングル",
   1.275 +                 "xn--qck1c2d4a9266lkmzb",           false, false, false],
   1.276 +
   1.277 +    // Katakana and Hangul
   1.278 +    ["ハングル한글",
   1.279 +                 "xn--qck1c2d4a2366lkmzb",           false, false, false],
   1.280 +
   1.281 +    // Thai (also tests that node with over 63 UTF-8 octets doesn't fail)
   1.282 +    ["เครื่องทําน้ําทําน้ําแข็ง",
   1.283 +                 "xn--22cdjb2fanb9fyepcbbb9dwh4a3igze4fdcd",
   1.284 +                                                     false, true, true]
   1.285 +];
   1.286 +
   1.287 +
   1.288 +const profiles = ["ASCII", "high", "moderate"];
   1.289 +
   1.290 +function run_test() {
   1.291 +    var pbi = Cc["@mozilla.org/preferences-service;1"].getService(Ci.nsIPrefBranch);
   1.292 +    var oldProfile = pbi.getCharPref("network.IDN.restriction_profile", "moderate");
   1.293 +    var oldWhiteListCom;
   1.294 +    try {
   1.295 +        oldWhitelistCom = pbi.getBoolPref("network.IDN.whitelist.com");
   1.296 +    } catch(e) {
   1.297 +        oldWhitelistCom = false;
   1.298 +    }
   1.299 +    var idnService = Cc["@mozilla.org/network/idn-service;1"].getService(Ci.nsIIDNService);
   1.300 +
   1.301 +    for (var i = 0; i < profiles.length; ++i) {
   1.302 +        pbi.setCharPref("network.IDN.restriction_profile", profiles[i]);
   1.303 +        pbi.setBoolPref("network.IDN.whitelist.com", false);
   1.304 +
   1.305 +        dump("testing " + profiles[i] + " profile");
   1.306 +
   1.307 +        for (var j = 0; j < testcases.length; ++j) {
   1.308 +            var test = testcases[j];
   1.309 +            var URL = test[0] + ".com";
   1.310 +            var punycodeURL = test[1] + ".com";
   1.311 +            var expectedUnicode = test[2 + i];
   1.312 +            var isASCII = {};
   1.313 +
   1.314 +	    var result;
   1.315 +	    try {
   1.316 +		result = idnService.convertToDisplayIDN(URL, isASCII);
   1.317 +	    } catch(e) {
   1.318 +		result = ".com";
   1.319 +	    }
   1.320 +            if (punycodeURL.substr(0, 4) == "xn--") {
   1.321 +                // test convertToDisplayIDN with a Unicode URL and with a
   1.322 +                //  Punycode URL if we have one
   1.323 +                do_check_eq(escape(result),
   1.324 +                            expectedUnicode ? escape(URL) : escape(punycodeURL));
   1.325 +
   1.326 +                result = idnService.convertToDisplayIDN(punycodeURL, isASCII);
   1.327 +                do_check_eq(escape(result),
   1.328 +                            expectedUnicode ? escape(URL) : escape(punycodeURL));
   1.329 +            } else {
   1.330 +                // The "punycode" URL isn't punycode. This happens in testcases
   1.331 +                // where the Unicode URL has become normalized to an ASCII URL,
   1.332 +                // so, even though expectedUnicode is true, the expected result
   1.333 +                // is equal to punycodeURL
   1.334 +                do_check_eq(escape(result), escape(punycodeURL));
   1.335 +            }
   1.336 +        }
   1.337 +    }
   1.338 +    pbi.setBoolPref("network.IDN.whitelist.com", oldWhitelistCom);
   1.339 +    pbi.setCharPref("network.IDN.restriction_profile", oldProfile);
   1.340 +}

mercurial