netwerk/test/unit/test_idn_urls.js

Thu, 15 Jan 2015 21:03:48 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 15 Jan 2015 21:03:48 +0100
branch
TOR_BUG_9701
changeset 11
deefc01c0e14
permissions
-rw-r--r--

Integrate friendly tips from Tor colleagues to make (or not) 4.5 alpha 3;
This includes removal of overloaded (but unused) methods, and addition of
a overlooked call to DataStruct::SetData(nsISupports, uint32_t, bool.)

michael@0 1 // Test algorithm for unicode display of IDNA URL (bug 722299)
michael@0 2 const testcases = [
michael@0 3 // Original Punycode or Expected UTF-8 by profile
michael@0 4 // URL normalized form ASCII-Only, High, Moderate
michael@0 5 //
michael@0 6 // Latin script
michael@0 7 ["cuillère", "xn--cuillre-6xa", false, true, true],
michael@0 8
michael@0 9 // repeated non-spacing marks
michael@0 10 ["gruz̀̀ere", "xn--gruzere-ogea", false, false, false],
michael@0 11
michael@0 12 // non-XID character
michael@0 13 ["I♥NY", "xn--iny-zx5a", false, false, false],
michael@0 14
michael@0 15 // new non-XID character in Unicode 6.3
michael@0 16 ["حلا\u061cل", "xn--bgbvr6gc", false, false, false],
michael@0 17
michael@0 18 // U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490)
michael@0 19 ["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup", false, true, true],
michael@0 20
michael@0 21 // Cyrillic alone
michael@0 22 ["толсто́й", "xn--lsa83dealbred", false, true, true],
michael@0 23
michael@0 24 // Mixed script Cyrillic/Latin
michael@0 25 ["толсто́й-in-Russian",
michael@0 26 "xn---in-russian-1jg071b0a8bb4cpd", false, false, false],
michael@0 27
michael@0 28 // Mixed script Latin/Cyrillic
michael@0 29 ["war-and-миръ", "xn--war-and--b9g3b7b3h", false, false, false],
michael@0 30
michael@0 31 // Cherokee (Restricted script)
michael@0 32 ["ᏣᎳᎩ", "xn--f9dt7l", false, false, false],
michael@0 33
michael@0 34 // Yi (Aspirational script)
michael@0 35 ["ꆈꌠꁱꂷ", "xn--4o7a6e1x64c", false, true, true],
michael@0 36
michael@0 37 // Greek alone
michael@0 38 ["πλάτων", "xn--hxa3ahjw4a", false, true, true],
michael@0 39
michael@0 40 // Mixed script Greek/Latin
michael@0 41 ["πλάτωνicrelationship",
michael@0 42 "xn--icrelationship-96j4t9a3cwe2e", false, false, false],
michael@0 43
michael@0 44 // Mixed script Latin/Greek
michael@0 45 ["spaceὈδύσσεια", "xn--space-h9dui0b0ga2j1562b", false, false, false],
michael@0 46
michael@0 47 // Devanagari alone
michael@0 48 ["मराठी", "xn--d2b1ag0dl", false, true, true],
michael@0 49
michael@0 50 // Devanagari with Armenian
michael@0 51 ["मराठीՀայաստան",
michael@0 52 "xn--y9aaa1d0ai1cq964f8dwa2o1a", false, false, false],
michael@0 53
michael@0 54 // Devanagari with common
michael@0 55 ["मराठी123", "xn--123-mhh3em2hra", false, true, true],
michael@0 56
michael@0 57 // Common with Devanagari
michael@0 58 ["123मराठी", "xn--123-phh3em2hra", false, true, true],
michael@0 59
michael@0 60 // Latin with Han
michael@0 61 ["chairman毛",
michael@0 62 "xn--chairman-k65r", false, true, true],
michael@0 63
michael@0 64 // Han with Latin
michael@0 65 ["山葵sauce", "xn--sauce-6j9ii40v", false, true, true],
michael@0 66
michael@0 67 // Latin with Han, Hiragana and Katakana
michael@0 68 ["van語ではドイ", "xn--van-ub4bpb6w0in486d", false, true, true],
michael@0 69
michael@0 70 // Latin with Han, Katakana and Hiragana
michael@0 71 ["van語ドイでは", "xn--van-ub4bpb4w0ip486d", false, true, true],
michael@0 72
michael@0 73 // Latin with Hiragana, Han and Katakana
michael@0 74 ["vanでは語ドイ", "xn--van-ub4bpb6w0ip486d", false, true, true],
michael@0 75
michael@0 76 // Latin with Hiragana, Katakana and Han
michael@0 77 ["vanではドイ語", "xn--van-ub4bpb6w0ir486d", false, true, true],
michael@0 78
michael@0 79 // Latin with Katakana, Han and Hiragana
michael@0 80 ["vanドイ語では", "xn--van-ub4bpb4w0ir486d", false, true, true],
michael@0 81
michael@0 82 // Latin with Katakana, Hiragana and Han
michael@0 83 ["vanドイでは語", "xn--van-ub4bpb4w0it486d", false, true, true],
michael@0 84
michael@0 85 // Han with Latin, Hiragana and Katakana
michael@0 86 ["語vanではドイ", "xn--van-ub4bpb6w0ik486d", false, true, true],
michael@0 87
michael@0 88 // Han with Latin, Katakana and Hiragana
michael@0 89 ["語vanドイでは", "xn--van-ub4bpb4w0im486d", false, true, true],
michael@0 90
michael@0 91 // Han with Hiragana, Latin and Katakana
michael@0 92 ["語ではvanドイ", "xn--van-rb4bpb9w0ik486d", false, true, true],
michael@0 93
michael@0 94 // Han with Hiragana, Katakana and Latin
michael@0 95 ["語ではドイvan", "xn--van-rb4bpb6w0in486d", false, true, true],
michael@0 96
michael@0 97 // Han with Katakana, Latin and Hiragana
michael@0 98 ["語ドイvanでは", "xn--van-ub4bpb1w0ip486d", false, true, true],
michael@0 99
michael@0 100 // Han with Katakana, Hiragana and Latin
michael@0 101 ["語ドイではvan", "xn--van-rb4bpb4w0ip486d", false, true, true],
michael@0 102
michael@0 103 // Hiragana with Latin, Han and Katakana
michael@0 104 ["イツvan語ではド", "xn--van-ub4bpb1wvhsbx330n", false, true, true],
michael@0 105
michael@0 106 // Hiragana with Latin, Katakana and Han
michael@0 107 ["ではvanドイ語", "xn--van-rb4bpb9w0ir486d", false, true, true],
michael@0 108
michael@0 109 // Hiragana with Han, Latin and Katakana
michael@0 110 ["では語vanドイ", "xn--van-rb4bpb9w0im486d", false, true, true],
michael@0 111
michael@0 112 // Hiragana with Han, Katakana and Latin
michael@0 113 ["では語ドイvan", "xn--van-rb4bpb6w0ip486d", false, true, true],
michael@0 114
michael@0 115 // Hiragana with Katakana, Latin and Han
michael@0 116 ["ではドイvan語", "xn--van-rb4bpb6w0iu486d", false, true, true],
michael@0 117
michael@0 118 // Hiragana with Katakana, Han and Latin
michael@0 119 ["ではドイ語van", "xn--van-rb4bpb6w0ir486d", false, true, true],
michael@0 120
michael@0 121 // Katakana with Latin, Han and Hiragana
michael@0 122 ["ドイvan語では", "xn--van-ub4bpb1w0iu486d", false, true, true],
michael@0 123
michael@0 124 // Katakana with Latin, Hiragana and Han
michael@0 125 ["ドイvanでは語", "xn--van-ub4bpb1w0iw486d", false, true, true],
michael@0 126
michael@0 127 // Katakana with Han, Latin and Hiragana
michael@0 128 ["ドイ語vanでは", "xn--van-ub4bpb1w0ir486d", false, true, true],
michael@0 129
michael@0 130 // Katakana with Han, Hiragana and Latin
michael@0 131 ["ドイ語ではvan", "xn--van-rb4bpb4w0ir486d", false, true, true],
michael@0 132
michael@0 133 // Katakana with Hiragana, Latin and Han
michael@0 134 ["ドイではvan語", "xn--van-rb4bpb4w0iw486d", false, true, true],
michael@0 135
michael@0 136 // Katakana with Hiragana, Han and Latin
michael@0 137 ["ドイでは語van", "xn--van-rb4bpb4w0it486d", false, true, true],
michael@0 138
michael@0 139 // Han with common
michael@0 140 ["中国123", "xn--123-u68dy61b", false, true, true],
michael@0 141
michael@0 142 // common with Han
michael@0 143 ["123中国", "xn--123-x68dy61b", false, true, true],
michael@0 144
michael@0 145 // Characters that normalize to permitted characters
michael@0 146 // (also tests Plane 1 supplementary characters)
michael@0 147 ["super𝟖", "super8", true, true, true],
michael@0 148
michael@0 149 // Han from Plane 2
michael@0 150 ["𠀀𠀁𠀂", "xn--j50icd", false, true, true],
michael@0 151
michael@0 152 // Han from Plane 2 with js (UTF-16) escapes
michael@0 153 ["\uD840\uDC00\uD840\uDC01\uD840\uDC02",
michael@0 154 "xn--j50icd", false, true, true],
michael@0 155
michael@0 156 // Same with a lone high surrogate at the end
michael@0 157 ["\uD840\uDC00\uD840\uDC01\uD840", "", false, false, false],
michael@0 158
michael@0 159 // Latin text and Bengali digits
michael@0 160 ["super৪", "xn--super-k2l", false, false, true],
michael@0 161
michael@0 162 // Bengali digits and Latin text
michael@0 163 ["৫ab", "xn--ab-x5f", false, false, true],
michael@0 164
michael@0 165 // Bengali text and Latin digits
michael@0 166 ["অঙ্কুর8", "xn--8-70d2cp0j6dtd", false, true, true],
michael@0 167
michael@0 168 // Latin digits and Bengali text
michael@0 169 ["5াব", "xn--5-h3d7c", false, true, true],
michael@0 170
michael@0 171 // Mixed numbering systems
michael@0 172 ["٢٠۰٠", "xn--8hbae38c", false, false, false],
michael@0 173
michael@0 174 // Traditional Chinese
michael@0 175 ["萬城", "xn--uis754h", false, true, true],
michael@0 176
michael@0 177 // Simplified Chinese
michael@0 178 ["万城", "xn--chq31v", false, true, true],
michael@0 179
michael@0 180 // Simplified-only and Traditional-only Chinese in the same label
michael@0 181 ["万萬城", "xn--chq31vsl1b", false, true, true],
michael@0 182
michael@0 183 // Traditional-only and Simplified-only Chinese in the same label
michael@0 184 ["萬万城", "xn--chq31vrl1b", false, true, true],
michael@0 185
michael@0 186 // Han and Latin and Bopomofo
michael@0 187 ["注音符号bopomofoㄅㄆㄇㄈ",
michael@0 188 "xn--bopomofo-hj5gkalm1637i876cuw0brk5f",
michael@0 189 false, true, true],
michael@0 190
michael@0 191 // Han, bopomofo, Latin
michael@0 192 ["注音符号ㄅㄆㄇㄈbopomofo",
michael@0 193 "xn--bopomofo-8i5gkalm9637i876cuw0brk5f",
michael@0 194 false, true, true],
michael@0 195
michael@0 196 // Latin, Han, Bopomofo
michael@0 197 ["bopomofo注音符号ㄅㄆㄇㄈ",
michael@0 198 "xn--bopomofo-hj5gkalm9637i876cuw0brk5f",
michael@0 199 false, true, true],
michael@0 200
michael@0 201 // Latin, Bopomofo, Han
michael@0 202 ["bopomofoㄅㄆㄇㄈ注音符号",
michael@0 203 "xn--bopomofo-hj5gkalm3737i876cuw0brk5f",
michael@0 204 false, true, true],
michael@0 205
michael@0 206 // Bopomofo, Han, Latin
michael@0 207 ["ㄅㄆㄇㄈ注音符号bopomofo",
michael@0 208 "xn--bopomofo-8i5gkalm3737i876cuw0brk5f",
michael@0 209 false, true, true],
michael@0 210
michael@0 211 // Bopomofo, Latin, Han
michael@0 212 ["ㄅㄆㄇㄈbopomofo注音符号",
michael@0 213 "xn--bopomofo-8i5gkalm1837i876cuw0brk5f",
michael@0 214 false, true, true],
michael@0 215
michael@0 216 // Han, bopomofo and katakana
michael@0 217 ["注音符号ㄅㄆㄇㄈボポモフォ",
michael@0 218 "xn--jckteuaez1shij0450gylvccz9asi4e",
michael@0 219 false, false, false],
michael@0 220
michael@0 221 // Han, katakana, bopomofo
michael@0 222 ["注音符号ボポモフォㄅㄆㄇㄈ",
michael@0 223 "xn--jckteuaez6shij5350gylvccz9asi4e",
michael@0 224 false, false, false],
michael@0 225
michael@0 226 // bopomofo, han, katakana
michael@0 227 ["ㄅㄆㄇㄈ注音符号ボポモフォ",
michael@0 228 "xn--jckteuaez1shij4450gylvccz9asi4e",
michael@0 229 false, false, false],
michael@0 230
michael@0 231 // bopomofo, katakana, han
michael@0 232 ["ㄅㄆㄇㄈボポモフォ注音符号",
michael@0 233 "xn--jckteuaez1shij9450gylvccz9asi4e",
michael@0 234 false, false, false],
michael@0 235
michael@0 236 // katakana, Han, bopomofo
michael@0 237 ["ボポモフォ注音符号ㄅㄆㄇㄈ",
michael@0 238 "xn--jckteuaez6shij0450gylvccz9asi4e",
michael@0 239 false, false, false],
michael@0 240
michael@0 241 // katakana, bopomofo, Han
michael@0 242 ["ボポモフォㄅㄆㄇㄈ注音符号",
michael@0 243 "xn--jckteuaez6shij4450gylvccz9asi4e",
michael@0 244 false, false, false],
michael@0 245
michael@0 246 // Han, Hangul and Latin
michael@0 247 ["韓한글hangul",
michael@0 248 "xn--hangul-2m5ti09k79ze", false, true, true],
michael@0 249
michael@0 250 // Han, Latin and Hangul
michael@0 251 ["韓hangul한글",
michael@0 252 "xn--hangul-2m5to09k79ze", false, true, true],
michael@0 253
michael@0 254 // Hangul, Han and Latin
michael@0 255 ["한글韓hangul",
michael@0 256 "xn--hangul-2m5th09k79ze", false, true, true],
michael@0 257
michael@0 258 // Hangul, Latin and Han
michael@0 259 ["한글hangul韓",
michael@0 260 "xn--hangul-8m5t898k79ze", false, true, true],
michael@0 261
michael@0 262 // Latin, Han and Hangul
michael@0 263 ["hangul韓한글",
michael@0 264 "xn--hangul-8m5ti09k79ze", false, true, true],
michael@0 265
michael@0 266 // Latin, Hangul and Han
michael@0 267 ["hangul한글韓",
michael@0 268 "xn--hangul-8m5th09k79ze", false, true, true],
michael@0 269
michael@0 270 // Hangul and katakana
michael@0 271 ["한글ハングル",
michael@0 272 "xn--qck1c2d4a9266lkmzb", false, false, false],
michael@0 273
michael@0 274 // Katakana and Hangul
michael@0 275 ["ハングル한글",
michael@0 276 "xn--qck1c2d4a2366lkmzb", false, false, false],
michael@0 277
michael@0 278 // Thai (also tests that node with over 63 UTF-8 octets doesn't fail)
michael@0 279 ["เครื่องทําน้ําทําน้ําแข็ง",
michael@0 280 "xn--22cdjb2fanb9fyepcbbb9dwh4a3igze4fdcd",
michael@0 281 false, true, true]
michael@0 282 ];
michael@0 283
michael@0 284
michael@0 285 const profiles = ["ASCII", "high", "moderate"];
michael@0 286
michael@0 287 function run_test() {
michael@0 288 var pbi = Cc["@mozilla.org/preferences-service;1"].getService(Ci.nsIPrefBranch);
michael@0 289 var oldProfile = pbi.getCharPref("network.IDN.restriction_profile", "moderate");
michael@0 290 var oldWhiteListCom;
michael@0 291 try {
michael@0 292 oldWhitelistCom = pbi.getBoolPref("network.IDN.whitelist.com");
michael@0 293 } catch(e) {
michael@0 294 oldWhitelistCom = false;
michael@0 295 }
michael@0 296 var idnService = Cc["@mozilla.org/network/idn-service;1"].getService(Ci.nsIIDNService);
michael@0 297
michael@0 298 for (var i = 0; i < profiles.length; ++i) {
michael@0 299 pbi.setCharPref("network.IDN.restriction_profile", profiles[i]);
michael@0 300 pbi.setBoolPref("network.IDN.whitelist.com", false);
michael@0 301
michael@0 302 dump("testing " + profiles[i] + " profile");
michael@0 303
michael@0 304 for (var j = 0; j < testcases.length; ++j) {
michael@0 305 var test = testcases[j];
michael@0 306 var URL = test[0] + ".com";
michael@0 307 var punycodeURL = test[1] + ".com";
michael@0 308 var expectedUnicode = test[2 + i];
michael@0 309 var isASCII = {};
michael@0 310
michael@0 311 var result;
michael@0 312 try {
michael@0 313 result = idnService.convertToDisplayIDN(URL, isASCII);
michael@0 314 } catch(e) {
michael@0 315 result = ".com";
michael@0 316 }
michael@0 317 if (punycodeURL.substr(0, 4) == "xn--") {
michael@0 318 // test convertToDisplayIDN with a Unicode URL and with a
michael@0 319 // Punycode URL if we have one
michael@0 320 do_check_eq(escape(result),
michael@0 321 expectedUnicode ? escape(URL) : escape(punycodeURL));
michael@0 322
michael@0 323 result = idnService.convertToDisplayIDN(punycodeURL, isASCII);
michael@0 324 do_check_eq(escape(result),
michael@0 325 expectedUnicode ? escape(URL) : escape(punycodeURL));
michael@0 326 } else {
michael@0 327 // The "punycode" URL isn't punycode. This happens in testcases
michael@0 328 // where the Unicode URL has become normalized to an ASCII URL,
michael@0 329 // so, even though expectedUnicode is true, the expected result
michael@0 330 // is equal to punycodeURL
michael@0 331 do_check_eq(escape(result), escape(punycodeURL));
michael@0 332 }
michael@0 333 }
michael@0 334 }
michael@0 335 pbi.setBoolPref("network.IDN.whitelist.com", oldWhitelistCom);
michael@0 336 pbi.setCharPref("network.IDN.restriction_profile", oldProfile);
michael@0 337 }

mercurial