|
1 /* Test case for bug 340714 |
|
2 * |
|
3 * Uses nsIConverterInputStream to decode UTF-16 text with all combinations |
|
4 * of UTF-16BE and UTF-16LE with and without BOM. |
|
5 * |
|
6 * Sample text is: "Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему." |
|
7 * |
|
8 * The enclosing quotation marks are included in the sample text to test that |
|
9 * UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is |
|
10 * not explicitly called. This only works when the first character of the text |
|
11 * is an eight-bit character. |
|
12 */ |
|
13 |
|
14 const beBOM="%FE%FF"; |
|
15 const leBOM="%FF%FE"; |
|
16 const sampleUTF16BE="%00%22%04%12%04%41%04%35%00%20%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%00%20%04%41%04%35%04%3C%04%4C%04%38%00%20%04%3F%04%3E%04%45%04%3E%04%36%04%38%00%20%04%34%04%40%04%43%04%33%00%20%04%3D%04%30%00%20%04%34%04%40%04%43%04%33%04%30%00%2C%00%20%04%3A%04%30%04%36%04%34%04%30%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%00%20%04%41%04%35%04%3C%04%4C%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%00%20%04%3F%04%3E%00%2D%04%41%04%32%04%3E%04%35%04%3C%04%43%00%2E%00%22"; |
|
17 const sampleUTF16LE="%22%00%12%04%41%04%35%04%20%00%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%04%20%00%41%04%35%04%3C%04%4C%04%38%04%20%00%3F%04%3E%04%45%04%3E%04%36%04%38%04%20%00%34%04%40%04%43%04%33%04%20%00%3D%04%30%04%20%00%34%04%40%04%43%04%33%04%30%04%2C%00%20%00%3A%04%30%04%36%04%34%04%30%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%04%20%00%41%04%35%04%3C%04%4C%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%20%00%3F%04%3E%04%2D%00%41%04%32%04%3E%04%35%04%3C%04%43%04%2E%00%22%00"; |
|
18 const expected = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\""; |
|
19 |
|
20 function makeText(withBOM, charset) |
|
21 { |
|
22 var theText = eval("sample" + charset); |
|
23 if (withBOM) { |
|
24 if (charset == "UTF16BE") { |
|
25 theText = beBOM + theText; |
|
26 } else { |
|
27 theText = leBOM + theText; |
|
28 } |
|
29 } |
|
30 return theText; |
|
31 } |
|
32 |
|
33 function testCase(withBOM, charset, charsetDec, decoder, bufferLength) |
|
34 { |
|
35 var dataURI = "data:text/plain;charset=" + charsetDec + "," + |
|
36 makeText(withBOM, charset); |
|
37 |
|
38 var IOService = Components.Constructor("@mozilla.org/network/io-service;1", |
|
39 "nsIIOService"); |
|
40 var ConverterInputStream = |
|
41 Components.Constructor("@mozilla.org/intl/converter-input-stream;1", |
|
42 "nsIConverterInputStream", |
|
43 "init"); |
|
44 |
|
45 var ios = new IOService(); |
|
46 var channel = ios.newChannel(dataURI, "", null); |
|
47 var testInputStream = channel.open(); |
|
48 var testConverter = new ConverterInputStream(testInputStream, |
|
49 decoder, |
|
50 bufferLength, |
|
51 0xFFFD); |
|
52 |
|
53 if (!(testConverter instanceof |
|
54 Components.interfaces.nsIUnicharLineInputStream)) |
|
55 throw "not line input stream"; |
|
56 |
|
57 var outStr = ""; |
|
58 var more; |
|
59 do { |
|
60 // read the line and check for eof |
|
61 var line = {}; |
|
62 more = testConverter.readLine(line); |
|
63 outStr += line.value; |
|
64 } while (more); |
|
65 |
|
66 if (outStr != expected) { |
|
67 dump("Failed with BOM = " + withBOM + "; charset = " + charset + |
|
68 "; charset declaration = " + charsetDec + "; decoder = " + decoder + |
|
69 "; bufferLength = " + bufferLength + "\n"); |
|
70 if (outStr.length == expected.length) { |
|
71 for (i = 0; i < outStr.length; ++i) { |
|
72 if (outStr.charCodeAt(i) != expected.charCodeAt(i)) { |
|
73 dump(i + ": " + outStr.charCodeAt(i).toString(16) + " != " + expected.charCodeAt(i).toString(16) + "\n"); |
|
74 } |
|
75 } |
|
76 } |
|
77 } |
|
78 |
|
79 // escape the strings before comparing for better readability |
|
80 do_check_eq(escape(outStr), escape(expected)); |
|
81 } |
|
82 |
|
83 function run_test() |
|
84 { |
|
85 /* BOM charset charset decoder buffer |
|
86 declaration length */ |
|
87 testCase(true, "UTF16LE", "UTF-16", "UTF-16", 64); |
|
88 testCase(true, "UTF16BE", "UTF-16", "UTF-16", 64); |
|
89 testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 64); |
|
90 testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 64); |
|
91 testCase(false, "UTF16LE", "UTF-16", "UTF-16", 64); |
|
92 testCase(false, "UTF16BE", "UTF-16", "UTF-16", 64); |
|
93 testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 64); |
|
94 testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 64); |
|
95 testCase(true, "UTF16LE", "UTF-16", "UTF-16", 65); |
|
96 testCase(true, "UTF16BE", "UTF-16", "UTF-16", 65); |
|
97 testCase(true, "UTF16LE", "UTF-16", "UTF-16LE", 65); |
|
98 testCase(true, "UTF16BE", "UTF-16", "UTF-16BE", 65); |
|
99 testCase(false, "UTF16LE", "UTF-16", "UTF-16", 65); |
|
100 testCase(false, "UTF16BE", "UTF-16", "UTF-16", 65); |
|
101 testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 65); |
|
102 testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 65); |
|
103 } |