|
1 // Test algorithm for unicode display of IDNA URL (bug 722299) |
|
2 const testcases = [ |
|
3 // Original Punycode or Expected UTF-8 by profile |
|
4 // URL normalized form ASCII-Only, High, Moderate |
|
5 // |
|
6 // Latin script |
|
7 ["cuillère", "xn--cuillre-6xa", false, true, true], |
|
8 |
|
9 // repeated non-spacing marks |
|
10 ["gruz̀̀ere", "xn--gruzere-ogea", false, false, false], |
|
11 |
|
12 // non-XID character |
|
13 ["I♥NY", "xn--iny-zx5a", false, false, false], |
|
14 |
|
15 // new non-XID character in Unicode 6.3 |
|
16 ["حلا\u061cل", "xn--bgbvr6gc", false, false, false], |
|
17 |
|
18 // U+30FB KATAKANA MIDDLE DOT is excluded from non-XID characters (bug 857490) |
|
19 ["乾燥肌・石けん", "xn--08j4gylj12hz80b0uhfup", false, true, true], |
|
20 |
|
21 // Cyrillic alone |
|
22 ["толсто́й", "xn--lsa83dealbred", false, true, true], |
|
23 |
|
24 // Mixed script Cyrillic/Latin |
|
25 ["толсто́й-in-Russian", |
|
26 "xn---in-russian-1jg071b0a8bb4cpd", false, false, false], |
|
27 |
|
28 // Mixed script Latin/Cyrillic |
|
29 ["war-and-миръ", "xn--war-and--b9g3b7b3h", false, false, false], |
|
30 |
|
31 // Cherokee (Restricted script) |
|
32 ["ᏣᎳᎩ", "xn--f9dt7l", false, false, false], |
|
33 |
|
34 // Yi (Aspirational script) |
|
35 ["ꆈꌠꁱꂷ", "xn--4o7a6e1x64c", false, true, true], |
|
36 |
|
37 // Greek alone |
|
38 ["πλάτων", "xn--hxa3ahjw4a", false, true, true], |
|
39 |
|
40 // Mixed script Greek/Latin |
|
41 ["πλάτωνicrelationship", |
|
42 "xn--icrelationship-96j4t9a3cwe2e", false, false, false], |
|
43 |
|
44 // Mixed script Latin/Greek |
|
45 ["spaceὈδύσσεια", "xn--space-h9dui0b0ga2j1562b", false, false, false], |
|
46 |
|
47 // Devanagari alone |
|
48 ["मराठी", "xn--d2b1ag0dl", false, true, true], |
|
49 |
|
50 // Devanagari with Armenian |
|
51 ["मराठीՀայաստան", |
|
52 "xn--y9aaa1d0ai1cq964f8dwa2o1a", false, false, false], |
|
53 |
|
54 // Devanagari with common |
|
55 ["मराठी123", "xn--123-mhh3em2hra", false, true, true], |
|
56 |
|
57 // Common with Devanagari |
|
58 ["123मराठी", "xn--123-phh3em2hra", false, true, true], |
|
59 |
|
60 // Latin with Han |
|
61 ["chairman毛", |
|
62 "xn--chairman-k65r", false, true, true], |
|
63 |
|
64 // Han with Latin |
|
65 ["山葵sauce", "xn--sauce-6j9ii40v", false, true, true], |
|
66 |
|
67 // Latin with Han, Hiragana and Katakana |
|
68 ["van語ではドイ", "xn--van-ub4bpb6w0in486d", false, true, true], |
|
69 |
|
70 // Latin with Han, Katakana and Hiragana |
|
71 ["van語ドイでは", "xn--van-ub4bpb4w0ip486d", false, true, true], |
|
72 |
|
73 // Latin with Hiragana, Han and Katakana |
|
74 ["vanでは語ドイ", "xn--van-ub4bpb6w0ip486d", false, true, true], |
|
75 |
|
76 // Latin with Hiragana, Katakana and Han |
|
77 ["vanではドイ語", "xn--van-ub4bpb6w0ir486d", false, true, true], |
|
78 |
|
79 // Latin with Katakana, Han and Hiragana |
|
80 ["vanドイ語では", "xn--van-ub4bpb4w0ir486d", false, true, true], |
|
81 |
|
82 // Latin with Katakana, Hiragana and Han |
|
83 ["vanドイでは語", "xn--van-ub4bpb4w0it486d", false, true, true], |
|
84 |
|
85 // Han with Latin, Hiragana and Katakana |
|
86 ["語vanではドイ", "xn--van-ub4bpb6w0ik486d", false, true, true], |
|
87 |
|
88 // Han with Latin, Katakana and Hiragana |
|
89 ["語vanドイでは", "xn--van-ub4bpb4w0im486d", false, true, true], |
|
90 |
|
91 // Han with Hiragana, Latin and Katakana |
|
92 ["語ではvanドイ", "xn--van-rb4bpb9w0ik486d", false, true, true], |
|
93 |
|
94 // Han with Hiragana, Katakana and Latin |
|
95 ["語ではドイvan", "xn--van-rb4bpb6w0in486d", false, true, true], |
|
96 |
|
97 // Han with Katakana, Latin and Hiragana |
|
98 ["語ドイvanでは", "xn--van-ub4bpb1w0ip486d", false, true, true], |
|
99 |
|
100 // Han with Katakana, Hiragana and Latin |
|
101 ["語ドイではvan", "xn--van-rb4bpb4w0ip486d", false, true, true], |
|
102 |
|
103 // Hiragana with Latin, Han and Katakana |
|
104 ["イツvan語ではド", "xn--van-ub4bpb1wvhsbx330n", false, true, true], |
|
105 |
|
106 // Hiragana with Latin, Katakana and Han |
|
107 ["ではvanドイ語", "xn--van-rb4bpb9w0ir486d", false, true, true], |
|
108 |
|
109 // Hiragana with Han, Latin and Katakana |
|
110 ["では語vanドイ", "xn--van-rb4bpb9w0im486d", false, true, true], |
|
111 |
|
112 // Hiragana with Han, Katakana and Latin |
|
113 ["では語ドイvan", "xn--van-rb4bpb6w0ip486d", false, true, true], |
|
114 |
|
115 // Hiragana with Katakana, Latin and Han |
|
116 ["ではドイvan語", "xn--van-rb4bpb6w0iu486d", false, true, true], |
|
117 |
|
118 // Hiragana with Katakana, Han and Latin |
|
119 ["ではドイ語van", "xn--van-rb4bpb6w0ir486d", false, true, true], |
|
120 |
|
121 // Katakana with Latin, Han and Hiragana |
|
122 ["ドイvan語では", "xn--van-ub4bpb1w0iu486d", false, true, true], |
|
123 |
|
124 // Katakana with Latin, Hiragana and Han |
|
125 ["ドイvanでは語", "xn--van-ub4bpb1w0iw486d", false, true, true], |
|
126 |
|
127 // Katakana with Han, Latin and Hiragana |
|
128 ["ドイ語vanでは", "xn--van-ub4bpb1w0ir486d", false, true, true], |
|
129 |
|
130 // Katakana with Han, Hiragana and Latin |
|
131 ["ドイ語ではvan", "xn--van-rb4bpb4w0ir486d", false, true, true], |
|
132 |
|
133 // Katakana with Hiragana, Latin and Han |
|
134 ["ドイではvan語", "xn--van-rb4bpb4w0iw486d", false, true, true], |
|
135 |
|
136 // Katakana with Hiragana, Han and Latin |
|
137 ["ドイでは語van", "xn--van-rb4bpb4w0it486d", false, true, true], |
|
138 |
|
139 // Han with common |
|
140 ["中国123", "xn--123-u68dy61b", false, true, true], |
|
141 |
|
142 // common with Han |
|
143 ["123中国", "xn--123-x68dy61b", false, true, true], |
|
144 |
|
145 // Characters that normalize to permitted characters |
|
146 // (also tests Plane 1 supplementary characters) |
|
147 ["super𝟖", "super8", true, true, true], |
|
148 |
|
149 // Han from Plane 2 |
|
150 ["𠀀𠀁𠀂", "xn--j50icd", false, true, true], |
|
151 |
|
152 // Han from Plane 2 with js (UTF-16) escapes |
|
153 ["\uD840\uDC00\uD840\uDC01\uD840\uDC02", |
|
154 "xn--j50icd", false, true, true], |
|
155 |
|
156 // Same with a lone high surrogate at the end |
|
157 ["\uD840\uDC00\uD840\uDC01\uD840", "", false, false, false], |
|
158 |
|
159 // Latin text and Bengali digits |
|
160 ["super৪", "xn--super-k2l", false, false, true], |
|
161 |
|
162 // Bengali digits and Latin text |
|
163 ["৫ab", "xn--ab-x5f", false, false, true], |
|
164 |
|
165 // Bengali text and Latin digits |
|
166 ["অঙ্কুর8", "xn--8-70d2cp0j6dtd", false, true, true], |
|
167 |
|
168 // Latin digits and Bengali text |
|
169 ["5াব", "xn--5-h3d7c", false, true, true], |
|
170 |
|
171 // Mixed numbering systems |
|
172 ["٢٠۰٠", "xn--8hbae38c", false, false, false], |
|
173 |
|
174 // Traditional Chinese |
|
175 ["萬城", "xn--uis754h", false, true, true], |
|
176 |
|
177 // Simplified Chinese |
|
178 ["万城", "xn--chq31v", false, true, true], |
|
179 |
|
180 // Simplified-only and Traditional-only Chinese in the same label |
|
181 ["万萬城", "xn--chq31vsl1b", false, true, true], |
|
182 |
|
183 // Traditional-only and Simplified-only Chinese in the same label |
|
184 ["萬万城", "xn--chq31vrl1b", false, true, true], |
|
185 |
|
186 // Han and Latin and Bopomofo |
|
187 ["注音符号bopomofoㄅㄆㄇㄈ", |
|
188 "xn--bopomofo-hj5gkalm1637i876cuw0brk5f", |
|
189 false, true, true], |
|
190 |
|
191 // Han, bopomofo, Latin |
|
192 ["注音符号ㄅㄆㄇㄈbopomofo", |
|
193 "xn--bopomofo-8i5gkalm9637i876cuw0brk5f", |
|
194 false, true, true], |
|
195 |
|
196 // Latin, Han, Bopomofo |
|
197 ["bopomofo注音符号ㄅㄆㄇㄈ", |
|
198 "xn--bopomofo-hj5gkalm9637i876cuw0brk5f", |
|
199 false, true, true], |
|
200 |
|
201 // Latin, Bopomofo, Han |
|
202 ["bopomofoㄅㄆㄇㄈ注音符号", |
|
203 "xn--bopomofo-hj5gkalm3737i876cuw0brk5f", |
|
204 false, true, true], |
|
205 |
|
206 // Bopomofo, Han, Latin |
|
207 ["ㄅㄆㄇㄈ注音符号bopomofo", |
|
208 "xn--bopomofo-8i5gkalm3737i876cuw0brk5f", |
|
209 false, true, true], |
|
210 |
|
211 // Bopomofo, Latin, Han |
|
212 ["ㄅㄆㄇㄈbopomofo注音符号", |
|
213 "xn--bopomofo-8i5gkalm1837i876cuw0brk5f", |
|
214 false, true, true], |
|
215 |
|
216 // Han, bopomofo and katakana |
|
217 ["注音符号ㄅㄆㄇㄈボポモフォ", |
|
218 "xn--jckteuaez1shij0450gylvccz9asi4e", |
|
219 false, false, false], |
|
220 |
|
221 // Han, katakana, bopomofo |
|
222 ["注音符号ボポモフォㄅㄆㄇㄈ", |
|
223 "xn--jckteuaez6shij5350gylvccz9asi4e", |
|
224 false, false, false], |
|
225 |
|
226 // bopomofo, han, katakana |
|
227 ["ㄅㄆㄇㄈ注音符号ボポモフォ", |
|
228 "xn--jckteuaez1shij4450gylvccz9asi4e", |
|
229 false, false, false], |
|
230 |
|
231 // bopomofo, katakana, han |
|
232 ["ㄅㄆㄇㄈボポモフォ注音符号", |
|
233 "xn--jckteuaez1shij9450gylvccz9asi4e", |
|
234 false, false, false], |
|
235 |
|
236 // katakana, Han, bopomofo |
|
237 ["ボポモフォ注音符号ㄅㄆㄇㄈ", |
|
238 "xn--jckteuaez6shij0450gylvccz9asi4e", |
|
239 false, false, false], |
|
240 |
|
241 // katakana, bopomofo, Han |
|
242 ["ボポモフォㄅㄆㄇㄈ注音符号", |
|
243 "xn--jckteuaez6shij4450gylvccz9asi4e", |
|
244 false, false, false], |
|
245 |
|
246 // Han, Hangul and Latin |
|
247 ["韓한글hangul", |
|
248 "xn--hangul-2m5ti09k79ze", false, true, true], |
|
249 |
|
250 // Han, Latin and Hangul |
|
251 ["韓hangul한글", |
|
252 "xn--hangul-2m5to09k79ze", false, true, true], |
|
253 |
|
254 // Hangul, Han and Latin |
|
255 ["한글韓hangul", |
|
256 "xn--hangul-2m5th09k79ze", false, true, true], |
|
257 |
|
258 // Hangul, Latin and Han |
|
259 ["한글hangul韓", |
|
260 "xn--hangul-8m5t898k79ze", false, true, true], |
|
261 |
|
262 // Latin, Han and Hangul |
|
263 ["hangul韓한글", |
|
264 "xn--hangul-8m5ti09k79ze", false, true, true], |
|
265 |
|
266 // Latin, Hangul and Han |
|
267 ["hangul한글韓", |
|
268 "xn--hangul-8m5th09k79ze", false, true, true], |
|
269 |
|
270 // Hangul and katakana |
|
271 ["한글ハングル", |
|
272 "xn--qck1c2d4a9266lkmzb", false, false, false], |
|
273 |
|
274 // Katakana and Hangul |
|
275 ["ハングル한글", |
|
276 "xn--qck1c2d4a2366lkmzb", false, false, false], |
|
277 |
|
278 // Thai (also tests that node with over 63 UTF-8 octets doesn't fail) |
|
279 ["เครื่องทําน้ําทําน้ําแข็ง", |
|
280 "xn--22cdjb2fanb9fyepcbbb9dwh4a3igze4fdcd", |
|
281 false, true, true] |
|
282 ]; |
|
283 |
|
284 |
|
285 const profiles = ["ASCII", "high", "moderate"]; |
|
286 |
|
287 function run_test() { |
|
288 var pbi = Cc["@mozilla.org/preferences-service;1"].getService(Ci.nsIPrefBranch); |
|
289 var oldProfile = pbi.getCharPref("network.IDN.restriction_profile", "moderate"); |
|
290 var oldWhiteListCom; |
|
291 try { |
|
292 oldWhitelistCom = pbi.getBoolPref("network.IDN.whitelist.com"); |
|
293 } catch(e) { |
|
294 oldWhitelistCom = false; |
|
295 } |
|
296 var idnService = Cc["@mozilla.org/network/idn-service;1"].getService(Ci.nsIIDNService); |
|
297 |
|
298 for (var i = 0; i < profiles.length; ++i) { |
|
299 pbi.setCharPref("network.IDN.restriction_profile", profiles[i]); |
|
300 pbi.setBoolPref("network.IDN.whitelist.com", false); |
|
301 |
|
302 dump("testing " + profiles[i] + " profile"); |
|
303 |
|
304 for (var j = 0; j < testcases.length; ++j) { |
|
305 var test = testcases[j]; |
|
306 var URL = test[0] + ".com"; |
|
307 var punycodeURL = test[1] + ".com"; |
|
308 var expectedUnicode = test[2 + i]; |
|
309 var isASCII = {}; |
|
310 |
|
311 var result; |
|
312 try { |
|
313 result = idnService.convertToDisplayIDN(URL, isASCII); |
|
314 } catch(e) { |
|
315 result = ".com"; |
|
316 } |
|
317 if (punycodeURL.substr(0, 4) == "xn--") { |
|
318 // test convertToDisplayIDN with a Unicode URL and with a |
|
319 // Punycode URL if we have one |
|
320 do_check_eq(escape(result), |
|
321 expectedUnicode ? escape(URL) : escape(punycodeURL)); |
|
322 |
|
323 result = idnService.convertToDisplayIDN(punycodeURL, isASCII); |
|
324 do_check_eq(escape(result), |
|
325 expectedUnicode ? escape(URL) : escape(punycodeURL)); |
|
326 } else { |
|
327 // The "punycode" URL isn't punycode. This happens in testcases |
|
328 // where the Unicode URL has become normalized to an ASCII URL, |
|
329 // so, even though expectedUnicode is true, the expected result |
|
330 // is equal to punycodeURL |
|
331 do_check_eq(escape(result), escape(punycodeURL)); |
|
332 } |
|
333 } |
|
334 } |
|
335 pbi.setBoolPref("network.IDN.whitelist.com", oldWhitelistCom); |
|
336 pbi.setCharPref("network.IDN.restriction_profile", oldProfile); |
|
337 } |