1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/netwerk/test/unit/test_idn_urls.js Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,337 @@ 1.4 +// Test algorithm for unicode display of IDNA URL (bug 722299) 1.5 +const testcases = [ 1.6 + // Original Punycode or Expected UTF-8 by profile 1.7 + // URL normalized form ASCII-Only, High, Moderate 1.8 + // 1.9 + // Latin script 1.10 + ["cuillère", "xn--cuillre-6xa", false, true, true], 1.11 + 1.12 + // repeated non-spacing marks 1.13 + ["gruz̀̀ere", "xn--gruzere-ogea", false, false, false], 1.14 + 1.15 + // non-XID character 1.16 + ["I♥NY", "xn--iny-zx5a", false, false, false], 1.17 + 1.18 + // new non-XID character in Unicode 6.3 1.19 + ["حلا\u061cل", "xn--bgbvr6gc", false, false, false], 1.20 + 1.21 + // U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490) 1.22 + ["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup", false, true, true], 1.23 + 1.24 + // Cyrillic alone 1.25 + ["толсто́й", "xn--lsa83dealbred", false, true, true], 1.26 + 1.27 + // Mixed script Cyrillic/Latin 1.28 + ["толсто́й-in-Russian", 1.29 + "xn---in-russian-1jg071b0a8bb4cpd", false, false, false], 1.30 + 1.31 + // Mixed script Latin/Cyrillic 1.32 + ["war-and-миръ", "xn--war-and--b9g3b7b3h", false, false, false], 1.33 + 1.34 + // Cherokee (Restricted script) 1.35 + ["ᏣᎳᎩ", "xn--f9dt7l", false, false, false], 1.36 + 1.37 + // Yi (Aspirational script) 1.38 + ["ꆈꌠꁱꂷ", "xn--4o7a6e1x64c", false, true, true], 1.39 + 1.40 + // Greek alone 1.41 + ["πλάτων", "xn--hxa3ahjw4a", false, true, true], 1.42 + 1.43 + // Mixed script Greek/Latin 1.44 + ["πλάτωνicrelationship", 1.45 + "xn--icrelationship-96j4t9a3cwe2e", false, false, false], 1.46 + 1.47 + // Mixed script Latin/Greek 1.48 + ["spaceὈδύσσεια", "xn--space-h9dui0b0ga2j1562b", false, false, false], 1.49 + 1.50 + // Devanagari alone 1.51 + ["मराठी", "xn--d2b1ag0dl", false, true, true], 1.52 + 1.53 + // Devanagari with Armenian 1.54 + ["मराठीՀայաստան", 1.55 + "xn--y9aaa1d0ai1cq964f8dwa2o1a", false, false, false], 1.56 + 1.57 + // Devanagari with common 1.58 + ["मराठी123", "xn--123-mhh3em2hra", false, true, true], 1.59 + 1.60 + // Common with Devanagari 1.61 + ["123मराठी", "xn--123-phh3em2hra", false, true, true], 1.62 + 1.63 + // Latin with Han 1.64 + ["chairman毛", 1.65 + "xn--chairman-k65r", false, true, true], 1.66 + 1.67 + // Han with Latin 1.68 + ["山葵sauce", "xn--sauce-6j9ii40v", false, true, true], 1.69 + 1.70 + // Latin with Han, Hiragana and Katakana 1.71 + ["van語ではドイ", "xn--van-ub4bpb6w0in486d", false, true, true], 1.72 + 1.73 + // Latin with Han, Katakana and Hiragana 1.74 + ["van語ドイでは", "xn--van-ub4bpb4w0ip486d", false, true, true], 1.75 + 1.76 + // Latin with Hiragana, Han and Katakana 1.77 + ["vanでは語ドイ", "xn--van-ub4bpb6w0ip486d", false, true, true], 1.78 + 1.79 + // Latin with Hiragana, Katakana and Han 1.80 + ["vanではドイ語", "xn--van-ub4bpb6w0ir486d", false, true, true], 1.81 + 1.82 + // Latin with Katakana, Han and Hiragana 1.83 + ["vanドイ語では", "xn--van-ub4bpb4w0ir486d", false, true, true], 1.84 + 1.85 + // Latin with Katakana, Hiragana and Han 1.86 + ["vanドイでは語", "xn--van-ub4bpb4w0it486d", false, true, true], 1.87 + 1.88 + // Han with Latin, Hiragana and Katakana 1.89 + ["語vanではドイ", "xn--van-ub4bpb6w0ik486d", false, true, true], 1.90 + 1.91 + // Han with Latin, Katakana and Hiragana 1.92 + ["語vanドイでは", "xn--van-ub4bpb4w0im486d", false, true, true], 1.93 + 1.94 + // Han with Hiragana, Latin and Katakana 1.95 + ["語ではvanドイ", "xn--van-rb4bpb9w0ik486d", false, true, true], 1.96 + 1.97 + // Han with Hiragana, Katakana and Latin 1.98 + ["語ではドイvan", "xn--van-rb4bpb6w0in486d", false, true, true], 1.99 + 1.100 + // Han with Katakana, Latin and Hiragana 1.101 + ["語ドイvanでは", "xn--van-ub4bpb1w0ip486d", false, true, true], 1.102 + 1.103 + // Han with Katakana, Hiragana and Latin 1.104 + ["語ドイではvan", "xn--van-rb4bpb4w0ip486d", false, true, true], 1.105 + 1.106 + // Hiragana with Latin, Han and Katakana 1.107 + ["イツvan語ではド", "xn--van-ub4bpb1wvhsbx330n", false, true, true], 1.108 + 1.109 + // Hiragana with Latin, Katakana and Han 1.110 + ["ではvanドイ語", "xn--van-rb4bpb9w0ir486d", false, true, true], 1.111 + 1.112 + // Hiragana with Han, Latin and Katakana 1.113 + ["では語vanドイ", "xn--van-rb4bpb9w0im486d", false, true, true], 1.114 + 1.115 + // Hiragana with Han, Katakana and Latin 1.116 + ["では語ドイvan", "xn--van-rb4bpb6w0ip486d", false, true, true], 1.117 + 1.118 + // Hiragana with Katakana, Latin and Han 1.119 + ["ではドイvan語", "xn--van-rb4bpb6w0iu486d", false, true, true], 1.120 + 1.121 + // Hiragana with Katakana, Han and Latin 1.122 + ["ではドイ語van", "xn--van-rb4bpb6w0ir486d", false, true, true], 1.123 + 1.124 + // Katakana with Latin, Han and Hiragana 1.125 + ["ドイvan語では", "xn--van-ub4bpb1w0iu486d", false, true, true], 1.126 + 1.127 + // Katakana with Latin, Hiragana and Han 1.128 + ["ドイvanでは語", "xn--van-ub4bpb1w0iw486d", false, true, true], 1.129 + 1.130 + // Katakana with Han, Latin and Hiragana 1.131 + ["ドイ語vanでは", "xn--van-ub4bpb1w0ir486d", false, true, true], 1.132 + 1.133 + // Katakana with Han, Hiragana and Latin 1.134 + ["ドイ語ではvan", "xn--van-rb4bpb4w0ir486d", false, true, true], 1.135 + 1.136 + // Katakana with Hiragana, Latin and Han 1.137 + ["ドイではvan語", "xn--van-rb4bpb4w0iw486d", false, true, true], 1.138 + 1.139 + // Katakana with Hiragana, Han and Latin 1.140 + ["ドイでは語van", "xn--van-rb4bpb4w0it486d", false, true, true], 1.141 + 1.142 + // Han with common 1.143 + ["中国123", "xn--123-u68dy61b", false, true, true], 1.144 + 1.145 + // common with Han 1.146 + ["123中国", "xn--123-x68dy61b", false, true, true], 1.147 + 1.148 + // Characters that normalize to permitted characters 1.149 + // (also tests Plane 1 supplementary characters) 1.150 + ["super𝟖", "super8", true, true, true], 1.151 + 1.152 + // Han from Plane 2 1.153 + ["𠀀𠀁𠀂", "xn--j50icd", false, true, true], 1.154 + 1.155 + // Han from Plane 2 with js (UTF-16) escapes 1.156 + ["\uD840\uDC00\uD840\uDC01\uD840\uDC02", 1.157 + "xn--j50icd", false, true, true], 1.158 + 1.159 + // Same with a lone high surrogate at the end 1.160 + ["\uD840\uDC00\uD840\uDC01\uD840", "", false, false, false], 1.161 + 1.162 + // Latin text and Bengali digits 1.163 + ["super৪", "xn--super-k2l", false, false, true], 1.164 + 1.165 + // Bengali digits and Latin text 1.166 + ["৫ab", "xn--ab-x5f", false, false, true], 1.167 + 1.168 + // Bengali text and Latin digits 1.169 + ["অঙ্কুর8", "xn--8-70d2cp0j6dtd", false, true, true], 1.170 + 1.171 + // Latin digits and Bengali text 1.172 + ["5াব", "xn--5-h3d7c", false, true, true], 1.173 + 1.174 + // Mixed numbering systems 1.175 + ["٢٠۰٠", "xn--8hbae38c", false, false, false], 1.176 + 1.177 + // Traditional Chinese 1.178 + ["萬城", "xn--uis754h", false, true, true], 1.179 + 1.180 + // Simplified Chinese 1.181 + ["万城", "xn--chq31v", false, true, true], 1.182 + 1.183 + // Simplified-only and Traditional-only Chinese in the same label 1.184 + ["万萬城", "xn--chq31vsl1b", false, true, true], 1.185 + 1.186 + // Traditional-only and Simplified-only Chinese in the same label 1.187 + ["萬万城", "xn--chq31vrl1b", false, true, true], 1.188 + 1.189 + // Han and Latin and Bopomofo 1.190 + ["注音符号bopomofoㄅㄆㄇㄈ", 1.191 + "xn--bopomofo-hj5gkalm1637i876cuw0brk5f", 1.192 + false, true, true], 1.193 + 1.194 + // Han, bopomofo, Latin 1.195 + ["注音符号ㄅㄆㄇㄈbopomofo", 1.196 + "xn--bopomofo-8i5gkalm9637i876cuw0brk5f", 1.197 + false, true, true], 1.198 + 1.199 + // Latin, Han, Bopomofo 1.200 + ["bopomofo注音符号ㄅㄆㄇㄈ", 1.201 + "xn--bopomofo-hj5gkalm9637i876cuw0brk5f", 1.202 + false, true, true], 1.203 + 1.204 + // Latin, Bopomofo, Han 1.205 + ["bopomofoㄅㄆㄇㄈ注音符号", 1.206 + "xn--bopomofo-hj5gkalm3737i876cuw0brk5f", 1.207 + false, true, true], 1.208 + 1.209 + // Bopomofo, Han, Latin 1.210 + ["ㄅㄆㄇㄈ注音符号bopomofo", 1.211 + "xn--bopomofo-8i5gkalm3737i876cuw0brk5f", 1.212 + false, true, true], 1.213 + 1.214 + // Bopomofo, Latin, Han 1.215 + ["ㄅㄆㄇㄈbopomofo注音符号", 1.216 + "xn--bopomofo-8i5gkalm1837i876cuw0brk5f", 1.217 + false, true, true], 1.218 + 1.219 + // Han, bopomofo and katakana 1.220 + ["注音符号ㄅㄆㄇㄈボポモフォ", 1.221 + "xn--jckteuaez1shij0450gylvccz9asi4e", 1.222 + false, false, false], 1.223 + 1.224 + // Han, katakana, bopomofo 1.225 + ["注音符号ボポモフォㄅㄆㄇㄈ", 1.226 + "xn--jckteuaez6shij5350gylvccz9asi4e", 1.227 + false, false, false], 1.228 + 1.229 + // bopomofo, han, katakana 1.230 + ["ㄅㄆㄇㄈ注音符号ボポモフォ", 1.231 + "xn--jckteuaez1shij4450gylvccz9asi4e", 1.232 + false, false, false], 1.233 + 1.234 + // bopomofo, katakana, han 1.235 + ["ㄅㄆㄇㄈボポモフォ注音符号", 1.236 + "xn--jckteuaez1shij9450gylvccz9asi4e", 1.237 + false, false, false], 1.238 + 1.239 + // katakana, Han, bopomofo 1.240 + ["ボポモフォ注音符号ㄅㄆㄇㄈ", 1.241 + "xn--jckteuaez6shij0450gylvccz9asi4e", 1.242 + false, false, false], 1.243 + 1.244 + // katakana, bopomofo, Han 1.245 + ["ボポモフォㄅㄆㄇㄈ注音符号", 1.246 + "xn--jckteuaez6shij4450gylvccz9asi4e", 1.247 + false, false, false], 1.248 + 1.249 + // Han, Hangul and Latin 1.250 + ["韓한글hangul", 1.251 + "xn--hangul-2m5ti09k79ze", false, true, true], 1.252 + 1.253 + // Han, Latin and Hangul 1.254 + ["韓hangul한글", 1.255 + "xn--hangul-2m5to09k79ze", false, true, true], 1.256 + 1.257 + // Hangul, Han and Latin 1.258 + ["한글韓hangul", 1.259 + "xn--hangul-2m5th09k79ze", false, true, true], 1.260 + 1.261 + // Hangul, Latin and Han 1.262 + ["한글hangul韓", 1.263 + "xn--hangul-8m5t898k79ze", false, true, true], 1.264 + 1.265 + // Latin, Han and Hangul 1.266 + ["hangul韓한글", 1.267 + "xn--hangul-8m5ti09k79ze", false, true, true], 1.268 + 1.269 + // Latin, Hangul and Han 1.270 + ["hangul한글韓", 1.271 + "xn--hangul-8m5th09k79ze", false, true, true], 1.272 + 1.273 + // Hangul and katakana 1.274 + ["한글ハングル", 1.275 + "xn--qck1c2d4a9266lkmzb", false, false, false], 1.276 + 1.277 + // Katakana and Hangul 1.278 + ["ハングル한글", 1.279 + "xn--qck1c2d4a2366lkmzb", false, false, false], 1.280 + 1.281 + // Thai (also tests that node with over 63 UTF-8 octets doesn't fail) 1.282 + ["เครื่องทําน้ําทําน้ําแข็ง", 1.283 + "xn--22cdjb2fanb9fyepcbbb9dwh4a3igze4fdcd", 1.284 + false, true, true] 1.285 +]; 1.286 + 1.287 + 1.288 +const profiles = ["ASCII", "high", "moderate"]; 1.289 + 1.290 +function run_test() { 1.291 + var pbi = Cc["@mozilla.org/preferences-service;1"].getService(Ci.nsIPrefBranch); 1.292 + var oldProfile = pbi.getCharPref("network.IDN.restriction_profile", "moderate"); 1.293 + var oldWhiteListCom; 1.294 + try { 1.295 + oldWhitelistCom = pbi.getBoolPref("network.IDN.whitelist.com"); 1.296 + } catch(e) { 1.297 + oldWhitelistCom = false; 1.298 + } 1.299 + var idnService = Cc["@mozilla.org/network/idn-service;1"].getService(Ci.nsIIDNService); 1.300 + 1.301 + for (var i = 0; i < profiles.length; ++i) { 1.302 + pbi.setCharPref("network.IDN.restriction_profile", profiles[i]); 1.303 + pbi.setBoolPref("network.IDN.whitelist.com", false); 1.304 + 1.305 + dump("testing " + profiles[i] + " profile"); 1.306 + 1.307 + for (var j = 0; j < testcases.length; ++j) { 1.308 + var test = testcases[j]; 1.309 + var URL = test[0] + ".com"; 1.310 + var punycodeURL = test[1] + ".com"; 1.311 + var expectedUnicode = test[2 + i]; 1.312 + var isASCII = {}; 1.313 + 1.314 + var result; 1.315 + try { 1.316 + result = idnService.convertToDisplayIDN(URL, isASCII); 1.317 + } catch(e) { 1.318 + result = ".com"; 1.319 + } 1.320 + if (punycodeURL.substr(0, 4) == "xn--") { 1.321 + // test convertToDisplayIDN with a Unicode URL and with a 1.322 + // Punycode URL if we have one 1.323 + do_check_eq(escape(result), 1.324 + expectedUnicode ? escape(URL) : escape(punycodeURL)); 1.325 + 1.326 + result = idnService.convertToDisplayIDN(punycodeURL, isASCII); 1.327 + do_check_eq(escape(result), 1.328 + expectedUnicode ? escape(URL) : escape(punycodeURL)); 1.329 + } else { 1.330 + // The "punycode" URL isn't punycode. This happens in testcases 1.331 + // where the Unicode URL has become normalized to an ASCII URL, 1.332 + // so, even though expectedUnicode is true, the expected result 1.333 + // is equal to punycodeURL 1.334 + do_check_eq(escape(result), escape(punycodeURL)); 1.335 + } 1.336 + } 1.337 + } 1.338 + pbi.setBoolPref("network.IDN.whitelist.com", oldWhitelistCom); 1.339 + pbi.setCharPref("network.IDN.restriction_profile", oldProfile); 1.340 +}