|
1 // Tests illegal UTF-8 sequences |
|
2 |
|
3 const Cc = Components.Constructor; |
|
4 const Ci = Components.interfaces; |
|
5 |
|
6 const tests = [ |
|
7 { inStrings: ["%80", // Illegal or incomplete sequences |
|
8 "%8f", |
|
9 "%90", |
|
10 "%9f", |
|
11 "%a0", |
|
12 "%bf", |
|
13 "%c0", |
|
14 "%c1", |
|
15 "%c2", |
|
16 "%df", |
|
17 "%e0", |
|
18 "%e0%a0", |
|
19 "%e0%bf", |
|
20 "%ed%80", |
|
21 "%ed%9f", |
|
22 "%ef", |
|
23 "%ef%bf", |
|
24 "%f0", |
|
25 "%f0%90", |
|
26 "%f0%90%80", |
|
27 "%f0%90%bf", |
|
28 "%f0%bf", |
|
29 "%f0%bf%80", |
|
30 "%f0%bf%bf", |
|
31 "%f4", |
|
32 "%f4%80", |
|
33 "%f4%80%80", |
|
34 "%f4%80%bf", |
|
35 "%f4%8f", |
|
36 "%f4%8f%80", |
|
37 "%f4%8f%bf", |
|
38 "%f5", |
|
39 "%f7", |
|
40 "%f8", |
|
41 "%fb", |
|
42 "%fc", |
|
43 "%fd"], |
|
44 expected: "ABC\ufffdXYZ" }, |
|
45 |
|
46 { inStrings: ["%c0%af", // Illegal bytes in 2-octet |
|
47 "%c1%af"], // sequences |
|
48 expected: "ABC\ufffd\ufffdXYZ" }, |
|
49 |
|
50 { inStrings: ["%e0%80%80", // Illegal bytes in 3-octet |
|
51 "%e0%80%af", // sequences |
|
52 "%e0%9f%bf", |
|
53 // long surrogates |
|
54 "%ed%a0%80", // D800 |
|
55 "%ed%ad%bf", // DB7F |
|
56 "%ed%ae%80", // DB80 |
|
57 "%ed%af%bf", // DBFF |
|
58 "%ed%b0%80", // DC00 |
|
59 "%ed%be%80", // DF80 |
|
60 "%ed%bf%bf"], // DFFF |
|
61 expected: "ABC\ufffd\ufffd\ufffdXYZ" }, |
|
62 |
|
63 { inStrings: ["%f0%80%80%80", // Illegal bytes in 4-octet |
|
64 "%f0%80%80%af", // sequences |
|
65 "%f0%8f%bf%bf", |
|
66 "%f4%90%80%80", |
|
67 "%f4%bf%bf%bf", |
|
68 "%f5%80%80%80", |
|
69 "%f7%bf%bf%bf"], |
|
70 expected: "ABC\ufffd\ufffd\ufffd\ufffdXYZ" }, |
|
71 |
|
72 { inStrings: ["%f8%80%80%80%80", // Illegal bytes in 5-octet |
|
73 "%f8%80%80%80%af", // sequences |
|
74 "%fb%bf%bf%bf%bf"], |
|
75 expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffdXYZ" }, |
|
76 |
|
77 // Surrogate pairs |
|
78 { inStrings: ["%ed%a0%80%ed%b0%80", // D800 DC00 |
|
79 "%ed%a0%80%ed%bf%bf", // D800 DFFF |
|
80 "%ed%ad%bf%ed%b0%80", // DB7F DC00 |
|
81 "%ed%ad%bf%ed%bf%bf", // DB7F DFFF |
|
82 "%ed%ae%80%ed%b0%80", // DB80 DC00 |
|
83 "%ed%ae%80%ed%bf%bf", // DB80 DFFF |
|
84 "%ed%af%bf%ed%b0%80", // DBFF DC00 |
|
85 "%ed%ad%bf%ed%bf%bf", // DBFF DFFF |
|
86 "%fc%80%80%80%80%80", // Illegal bytes in 6-octet |
|
87 "%fc%80%80%80%80%af", // sequences |
|
88 "%fd%bf%bf%bf%bf%bf"], |
|
89 expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdXYZ" }, |
|
90 ]; |
|
91 |
|
92 |
|
93 function testCaseInputStream(inStr, expected) |
|
94 { |
|
95 var dataURI = "data:text/plain; charset=UTF-8,ABC" + inStr + "XYZ" |
|
96 dump(inStr + "==>"); |
|
97 |
|
98 var IOService = Cc("@mozilla.org/network/io-service;1", |
|
99 "nsIIOService"); |
|
100 var ConverterInputStream = |
|
101 Cc("@mozilla.org/intl/converter-input-stream;1", |
|
102 "nsIConverterInputStream", |
|
103 "init"); |
|
104 |
|
105 var ios = new IOService(); |
|
106 var channel = ios.newChannel(dataURI, "", null); |
|
107 var testInputStream = channel.open(); |
|
108 var testConverter = new ConverterInputStream(testInputStream, |
|
109 "UTF-8", |
|
110 16, |
|
111 0xFFFD); |
|
112 |
|
113 if (!(testConverter instanceof Ci.nsIUnicharLineInputStream)) |
|
114 throw "not line input stream"; |
|
115 |
|
116 var outStr = ""; |
|
117 var more; |
|
118 do { |
|
119 // read the line and check for eof |
|
120 var line = {}; |
|
121 more = testConverter.readLine(line); |
|
122 outStr += line.value; |
|
123 } while (more); |
|
124 |
|
125 dump(outStr + "; expected=" + expected + "\n"); |
|
126 do_check_eq(outStr, expected); |
|
127 do_check_eq(outStr.length, expected.length); |
|
128 } |
|
129 |
|
130 function run_test() { |
|
131 for (var t of tests) { |
|
132 for (var inStr of t.inStrings) { |
|
133 testCaseInputStream(inStr, t.expected); |
|
134 } |
|
135 } |
|
136 } |