michael@0: // Test algorithm for unicode display of IDNA URL (bug 722299) michael@0: const testcases = [ michael@0: // Original Punycode or Expected UTF-8 by profile michael@0: // URL normalized form ASCII-Only, High, Moderate michael@0: // michael@0: // Latin script michael@0: ["cuillère", "xn--cuillre-6xa", false, true, true], michael@0: michael@0: // repeated non-spacing marks michael@0: ["gruz̀̀ere", "xn--gruzere-ogea", false, false, false], michael@0: michael@0: // non-XID character michael@0: ["I♥NY", "xn--iny-zx5a", false, false, false], michael@0: michael@0: // new non-XID character in Unicode 6.3 michael@0: ["حلا\u061cل", "xn--bgbvr6gc", false, false, false], michael@0: michael@0: // U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490) michael@0: ["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup", false, true, true], michael@0: michael@0: // Cyrillic alone michael@0: ["толсто́й", "xn--lsa83dealbred", false, true, true], michael@0: michael@0: // Mixed script Cyrillic/Latin michael@0: ["толсто́й-in-Russian", michael@0: "xn---in-russian-1jg071b0a8bb4cpd", false, false, false], michael@0: michael@0: // Mixed script Latin/Cyrillic michael@0: ["war-and-миръ", "xn--war-and--b9g3b7b3h", false, false, false], michael@0: michael@0: // Cherokee (Restricted script) michael@0: ["ᏣᎳᎩ", "xn--f9dt7l", false, false, false], michael@0: michael@0: // Yi (Aspirational script) michael@0: ["ꆈꌠꁱꂷ", "xn--4o7a6e1x64c", false, true, true], michael@0: michael@0: // Greek alone michael@0: ["πλάτων", "xn--hxa3ahjw4a", false, true, true], michael@0: michael@0: // Mixed script Greek/Latin michael@0: ["πλάτωνicrelationship", michael@0: "xn--icrelationship-96j4t9a3cwe2e", false, false, false], michael@0: michael@0: // Mixed script Latin/Greek michael@0: ["spaceὈδύσσεια", "xn--space-h9dui0b0ga2j1562b", false, false, false], michael@0: michael@0: // Devanagari alone michael@0: ["मराठी", "xn--d2b1ag0dl", false, true, true], michael@0: michael@0: // Devanagari with Armenian michael@0: ["मराठीՀայաստան", michael@0: "xn--y9aaa1d0ai1cq964f8dwa2o1a", false, false, false], michael@0: michael@0: // Devanagari with common michael@0: ["मराठी123", "xn--123-mhh3em2hra", false, true, true], michael@0: michael@0: // Common with Devanagari michael@0: ["123मराठी", "xn--123-phh3em2hra", false, true, true], michael@0: michael@0: // Latin with Han michael@0: ["chairman毛", michael@0: "xn--chairman-k65r", false, true, true], michael@0: michael@0: // Han with Latin michael@0: ["山葵sauce", "xn--sauce-6j9ii40v", false, true, true], michael@0: michael@0: // Latin with Han, Hiragana and Katakana michael@0: ["van語ではドイ", "xn--van-ub4bpb6w0in486d", false, true, true], michael@0: michael@0: // Latin with Han, Katakana and Hiragana michael@0: ["van語ドイでは", "xn--van-ub4bpb4w0ip486d", false, true, true], michael@0: michael@0: // Latin with Hiragana, Han and Katakana michael@0: ["vanでは語ドイ", "xn--van-ub4bpb6w0ip486d", false, true, true], michael@0: michael@0: // Latin with Hiragana, Katakana and Han michael@0: ["vanではドイ語", "xn--van-ub4bpb6w0ir486d", false, true, true], michael@0: michael@0: // Latin with Katakana, Han and Hiragana michael@0: ["vanドイ語では", "xn--van-ub4bpb4w0ir486d", false, true, true], michael@0: michael@0: // Latin with Katakana, Hiragana and Han michael@0: ["vanドイでは語", "xn--van-ub4bpb4w0it486d", false, true, true], michael@0: michael@0: // Han with Latin, Hiragana and Katakana michael@0: ["語vanではドイ", "xn--van-ub4bpb6w0ik486d", false, true, true], michael@0: michael@0: // Han with Latin, Katakana and Hiragana michael@0: ["語vanドイでは", "xn--van-ub4bpb4w0im486d", false, true, true], michael@0: michael@0: // Han with Hiragana, Latin and Katakana michael@0: ["語ではvanドイ", "xn--van-rb4bpb9w0ik486d", false, true, true], michael@0: michael@0: // Han with Hiragana, Katakana and Latin michael@0: ["語ではドイvan", "xn--van-rb4bpb6w0in486d", false, true, true], michael@0: michael@0: // Han with Katakana, Latin and Hiragana michael@0: ["語ドイvanでは", "xn--van-ub4bpb1w0ip486d", false, true, true], michael@0: michael@0: // Han with Katakana, Hiragana and Latin michael@0: ["語ドイではvan", "xn--van-rb4bpb4w0ip486d", false, true, true], michael@0: michael@0: // Hiragana with Latin, Han and Katakana michael@0: ["イツvan語ではド", "xn--van-ub4bpb1wvhsbx330n", false, true, true], michael@0: michael@0: // Hiragana with Latin, Katakana and Han michael@0: ["ではvanドイ語", "xn--van-rb4bpb9w0ir486d", false, true, true], michael@0: michael@0: // Hiragana with Han, Latin and Katakana michael@0: ["では語vanドイ", "xn--van-rb4bpb9w0im486d", false, true, true], michael@0: michael@0: // Hiragana with Han, Katakana and Latin michael@0: ["では語ドイvan", "xn--van-rb4bpb6w0ip486d", false, true, true], michael@0: michael@0: // Hiragana with Katakana, Latin and Han michael@0: ["ではドイvan語", "xn--van-rb4bpb6w0iu486d", false, true, true], michael@0: michael@0: // Hiragana with Katakana, Han and Latin michael@0: ["ではドイ語van", "xn--van-rb4bpb6w0ir486d", false, true, true], michael@0: michael@0: // Katakana with Latin, Han and Hiragana michael@0: ["ドイvan語では", "xn--van-ub4bpb1w0iu486d", false, true, true], michael@0: michael@0: // Katakana with Latin, Hiragana and Han michael@0: ["ドイvanでは語", "xn--van-ub4bpb1w0iw486d", false, true, true], michael@0: michael@0: // Katakana with Han, Latin and Hiragana michael@0: ["ドイ語vanでは", "xn--van-ub4bpb1w0ir486d", false, true, true], michael@0: michael@0: // Katakana with Han, Hiragana and Latin michael@0: ["ドイ語ではvan", "xn--van-rb4bpb4w0ir486d", false, true, true], michael@0: michael@0: // Katakana with Hiragana, Latin and Han michael@0: ["ドイではvan語", "xn--van-rb4bpb4w0iw486d", false, true, true], michael@0: michael@0: // Katakana with Hiragana, Han and Latin michael@0: ["ドイでは語van", "xn--van-rb4bpb4w0it486d", false, true, true], michael@0: michael@0: // Han with common michael@0: ["中国123", "xn--123-u68dy61b", false, true, true], michael@0: michael@0: // common with Han michael@0: ["123中国", "xn--123-x68dy61b", false, true, true], michael@0: michael@0: // Characters that normalize to permitted characters michael@0: // (also tests Plane 1 supplementary characters) michael@0: ["super𝟖", "super8", true, true, true], michael@0: michael@0: // Han from Plane 2 michael@0: ["𠀀𠀁𠀂", "xn--j50icd", false, true, true], michael@0: michael@0: // Han from Plane 2 with js (UTF-16) escapes michael@0: ["\uD840\uDC00\uD840\uDC01\uD840\uDC02", michael@0: "xn--j50icd", false, true, true], michael@0: michael@0: // Same with a lone high surrogate at the end michael@0: ["\uD840\uDC00\uD840\uDC01\uD840", "", false, false, false], michael@0: michael@0: // Latin text and Bengali digits michael@0: ["super৪", "xn--super-k2l", false, false, true], michael@0: michael@0: // Bengali digits and Latin text michael@0: ["৫ab", "xn--ab-x5f", false, false, true], michael@0: michael@0: // Bengali text and Latin digits michael@0: ["অঙ্কুর8", "xn--8-70d2cp0j6dtd", false, true, true], michael@0: michael@0: // Latin digits and Bengali text michael@0: ["5াব", "xn--5-h3d7c", false, true, true], michael@0: michael@0: // Mixed numbering systems michael@0: ["٢٠۰٠", "xn--8hbae38c", false, false, false], michael@0: michael@0: // Traditional Chinese michael@0: ["萬城", "xn--uis754h", false, true, true], michael@0: michael@0: // Simplified Chinese michael@0: ["万城", "xn--chq31v", false, true, true], michael@0: michael@0: // Simplified-only and Traditional-only Chinese in the same label michael@0: ["万萬城", "xn--chq31vsl1b", false, true, true], michael@0: michael@0: // Traditional-only and Simplified-only Chinese in the same label michael@0: ["萬万城", "xn--chq31vrl1b", false, true, true], michael@0: michael@0: // Han and Latin and Bopomofo michael@0: ["注音符号bopomofoㄅㄆㄇㄈ", michael@0: "xn--bopomofo-hj5gkalm1637i876cuw0brk5f", michael@0: false, true, true], michael@0: michael@0: // Han, bopomofo, Latin michael@0: ["注音符号ㄅㄆㄇㄈbopomofo", michael@0: "xn--bopomofo-8i5gkalm9637i876cuw0brk5f", michael@0: false, true, true], michael@0: michael@0: // Latin, Han, Bopomofo michael@0: ["bopomofo注音符号ㄅㄆㄇㄈ", michael@0: "xn--bopomofo-hj5gkalm9637i876cuw0brk5f", michael@0: false, true, true], michael@0: michael@0: // Latin, Bopomofo, Han michael@0: ["bopomofoㄅㄆㄇㄈ注音符号", michael@0: "xn--bopomofo-hj5gkalm3737i876cuw0brk5f", michael@0: false, true, true], michael@0: michael@0: // Bopomofo, Han, Latin michael@0: ["ㄅㄆㄇㄈ注音符号bopomofo", michael@0: "xn--bopomofo-8i5gkalm3737i876cuw0brk5f", michael@0: false, true, true], michael@0: michael@0: // Bopomofo, Latin, Han michael@0: ["ㄅㄆㄇㄈbopomofo注音符号", michael@0: "xn--bopomofo-8i5gkalm1837i876cuw0brk5f", michael@0: false, true, true], michael@0: michael@0: // Han, bopomofo and katakana michael@0: ["注音符号ㄅㄆㄇㄈボポモフォ", michael@0: "xn--jckteuaez1shij0450gylvccz9asi4e", michael@0: false, false, false], michael@0: michael@0: // Han, katakana, bopomofo michael@0: ["注音符号ボポモフォㄅㄆㄇㄈ", michael@0: "xn--jckteuaez6shij5350gylvccz9asi4e", michael@0: false, false, false], michael@0: michael@0: // bopomofo, han, katakana michael@0: ["ㄅㄆㄇㄈ注音符号ボポモフォ", michael@0: "xn--jckteuaez1shij4450gylvccz9asi4e", michael@0: false, false, false], michael@0: michael@0: // bopomofo, katakana, han michael@0: ["ㄅㄆㄇㄈボポモフォ注音符号", michael@0: "xn--jckteuaez1shij9450gylvccz9asi4e", michael@0: false, false, false], michael@0: michael@0: // katakana, Han, bopomofo michael@0: ["ボポモフォ注音符号ㄅㄆㄇㄈ", michael@0: "xn--jckteuaez6shij0450gylvccz9asi4e", michael@0: false, false, false], michael@0: michael@0: // katakana, bopomofo, Han michael@0: ["ボポモフォㄅㄆㄇㄈ注音符号", michael@0: "xn--jckteuaez6shij4450gylvccz9asi4e", michael@0: false, false, false], michael@0: michael@0: // Han, Hangul and Latin michael@0: ["韓한글hangul", michael@0: "xn--hangul-2m5ti09k79ze", false, true, true], michael@0: michael@0: // Han, Latin and Hangul michael@0: ["韓hangul한글", michael@0: "xn--hangul-2m5to09k79ze", false, true, true], michael@0: michael@0: // Hangul, Han and Latin michael@0: ["한글韓hangul", michael@0: "xn--hangul-2m5th09k79ze", false, true, true], michael@0: michael@0: // Hangul, Latin and Han michael@0: ["한글hangul韓", michael@0: "xn--hangul-8m5t898k79ze", false, true, true], michael@0: michael@0: // Latin, Han and Hangul michael@0: ["hangul韓한글", michael@0: "xn--hangul-8m5ti09k79ze", false, true, true], michael@0: michael@0: // Latin, Hangul and Han michael@0: ["hangul한글韓", michael@0: "xn--hangul-8m5th09k79ze", false, true, true], michael@0: michael@0: // Hangul and katakana michael@0: ["한글ハングル", michael@0: "xn--qck1c2d4a9266lkmzb", false, false, false], michael@0: michael@0: // Katakana and Hangul michael@0: ["ハングル한글", michael@0: "xn--qck1c2d4a2366lkmzb", false, false, false], michael@0: michael@0: // Thai (also tests that node with over 63 UTF-8 octets doesn't fail) michael@0: ["เครื่องทําน้ําทําน้ําแข็ง", michael@0: "xn--22cdjb2fanb9fyepcbbb9dwh4a3igze4fdcd", michael@0: false, true, true] michael@0: ]; michael@0: michael@0: michael@0: const profiles = ["ASCII", "high", "moderate"]; michael@0: michael@0: function run_test() { michael@0: var pbi = Cc["@mozilla.org/preferences-service;1"].getService(Ci.nsIPrefBranch); michael@0: var oldProfile = pbi.getCharPref("network.IDN.restriction_profile", "moderate"); michael@0: var oldWhiteListCom; michael@0: try { michael@0: oldWhitelistCom = pbi.getBoolPref("network.IDN.whitelist.com"); michael@0: } catch(e) { michael@0: oldWhitelistCom = false; michael@0: } michael@0: var idnService = Cc["@mozilla.org/network/idn-service;1"].getService(Ci.nsIIDNService); michael@0: michael@0: for (var i = 0; i < profiles.length; ++i) { michael@0: pbi.setCharPref("network.IDN.restriction_profile", profiles[i]); michael@0: pbi.setBoolPref("network.IDN.whitelist.com", false); michael@0: michael@0: dump("testing " + profiles[i] + " profile"); michael@0: michael@0: for (var j = 0; j < testcases.length; ++j) { michael@0: var test = testcases[j]; michael@0: var URL = test[0] + ".com"; michael@0: var punycodeURL = test[1] + ".com"; michael@0: var expectedUnicode = test[2 + i]; michael@0: var isASCII = {}; michael@0: michael@0: var result; michael@0: try { michael@0: result = idnService.convertToDisplayIDN(URL, isASCII); michael@0: } catch(e) { michael@0: result = ".com"; michael@0: } michael@0: if (punycodeURL.substr(0, 4) == "xn--") { michael@0: // test convertToDisplayIDN with a Unicode URL and with a michael@0: // Punycode URL if we have one michael@0: do_check_eq(escape(result), michael@0: expectedUnicode ? escape(URL) : escape(punycodeURL)); michael@0: michael@0: result = idnService.convertToDisplayIDN(punycodeURL, isASCII); michael@0: do_check_eq(escape(result), michael@0: expectedUnicode ? escape(URL) : escape(punycodeURL)); michael@0: } else { michael@0: // The "punycode" URL isn't punycode. This happens in testcases michael@0: // where the Unicode URL has become normalized to an ASCII URL, michael@0: // so, even though expectedUnicode is true, the expected result michael@0: // is equal to punycodeURL michael@0: do_check_eq(escape(result), escape(punycodeURL)); michael@0: } michael@0: } michael@0: } michael@0: pbi.setBoolPref("network.IDN.whitelist.com", oldWhitelistCom); michael@0: pbi.setCharPref("network.IDN.restriction_profile", oldProfile); michael@0: }