Wed, 31 Dec 2014 07:22:50 +0100
Correct previous dual key logic pending first delivery installment.
1 const Cc = Components.classes;
2 const Ci = Components.interfaces;
4 const NS_ERROR_ILLEGAL_VALUE = Components.results.NS_ERROR_ILLEGAL_VALUE;
6 var BIS, BOS, _Pipe, COS, FIS, _SS, CIS;
8 var dataDir;
10 function run_test()
11 {
12 BIS = Components.Constructor("@mozilla.org/binaryinputstream;1",
13 "nsIBinaryInputStream",
14 "setInputStream");
15 BOS = Components.Constructor("@mozilla.org/binaryoutputstream;1",
16 "nsIBinaryOutputStream",
17 "setOutputStream");
18 _Pipe = Components.Constructor("@mozilla.org/pipe;1",
19 "nsIPipe",
20 "init");
21 COS = Components.Constructor("@mozilla.org/intl/converter-output-stream;1",
22 "nsIConverterOutputStream",
23 "init");
24 FIS = Components.Constructor("@mozilla.org/network/file-input-stream;1",
25 "nsIFileInputStream",
26 "init");
27 _SS = Components.Constructor("@mozilla.org/storagestream;1",
28 "nsIStorageStream",
29 "init");
30 CIS = Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
31 "nsIConverterInputStream",
32 "init");
34 dataDir = do_get_file("data/");
36 test_utf8_1();
37 test_utf16_1();
38 test_utf16_2();
39 test_utf16_3();
40 test_cross_conversion();
41 }
43 const UNICODE_STRINGS =
44 [
45 '\u00BD + \u00BE == \u00BD\u00B2 + \u00BC + \u00BE',
47 'AZaz09 \u007F ' + // U+000000 to U+00007F
48 '\u0080 \u0398 \u03BB \u0725 ' + // U+000080 to U+0007FF
49 '\u0964 \u0F5F \u20AC \uFFFB' // U+000800 to U+00FFFF
51 // there would be strings containing non-BMP code points here, but
52 // unfortunately JS strings are UCS-2 (and worse yet are treated as
53 // 16-bit values by the spec), so we have to do gymnastics to work
54 // with non-BMP -- manual surrogate decoding doesn't work because
55 // String.prototype.charCodeAt() ignores surrogate pairs and only
56 // returns 16-bit values
57 ];
59 // test conversion equality -- keys are names of files containing equivalent
60 // Unicode data, values are the encoding of the file in the format expected by
61 // nsIConverter(In|Out)putStream.init
62 const UNICODE_FILES =
63 {
64 "unicode-conversion.utf8.txt": "UTF-8",
65 "unicode-conversion.utf16.txt": "UTF-16",
66 "unicode-conversion.utf16le.txt": "UTF-16LE",
67 "unicode-conversion.utf16be.txt": "UTF-16BE"
68 };
70 function test_utf8_1()
71 {
72 for (var i = 0; i < UNICODE_STRINGS.length; i++)
73 {
74 var pipe = Pipe();
75 var conv = new COS(pipe.outputStream, "UTF-8", 1024, 0x0);
76 do_check_true(conv.writeString(UNICODE_STRINGS[i]));
77 conv.close();
79 if (!equal(new UTF8(pipe.inputStream),
80 stringToCodePoints(UNICODE_STRINGS[i])))
81 do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
82 }
83 }
85 function test_utf16_1()
86 {
87 for (var i = 0; i < UNICODE_STRINGS.length; i++)
88 {
89 var pipe = Pipe();
90 var conv = new COS(pipe.outputStream, "UTF-16", 1024, 0x0);
91 do_check_true(conv.writeString(UNICODE_STRINGS[i]));
92 conv.close();
94 if (!equal(new UTF16(pipe.inputStream),
95 stringToCodePoints(UNICODE_STRINGS[i])))
96 do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
97 }
98 }
100 function test_utf16_2()
101 {
102 for (var i = 0; i < UNICODE_STRINGS.length; i++)
103 {
104 var pipe = Pipe();
105 var conv = new COS(pipe.outputStream, "UTF-16LE", 1024, 0x0);
106 do_check_true(conv.writeString(UNICODE_STRINGS[i]));
107 conv.close();
109 if (!equal(new UTF16(pipe.inputStream, false),
110 stringToCodePoints(UNICODE_STRINGS[i])))
111 do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
112 }
113 }
115 function test_utf16_3()
116 {
117 for (var i = 0; i < UNICODE_STRINGS.length; i++)
118 {
119 var pipe = Pipe();
120 var conv = new COS(pipe.outputStream, "UTF-16BE", 1024, 0x0);
121 do_check_true(conv.writeString(UNICODE_STRINGS[i]));
122 conv.close();
124 if (!equal(new UTF16(pipe.inputStream, true),
125 stringToCodePoints(UNICODE_STRINGS[i])))
126 do_throw("UNICODE_STRINGS[" + i + "] not handled correctly");
127 }
128 }
131 function test_cross_conversion()
132 {
133 for (var fn1 in UNICODE_FILES)
134 {
135 var fin = getBinaryInputStream(fn1);
136 var ss = StorageStream();
138 var bos = new BOS(ss.getOutputStream(0));
139 var av;
140 while ((av = fin.available()) > 0)
141 {
142 var data = fin.readByteArray(av);
143 bos.writeByteArray(data, data.length);
144 }
145 fin.close();
146 bos.close();
148 for (var fn2 in UNICODE_FILES)
149 {
150 var fin2 = getUnicharInputStream(fn2, UNICODE_FILES[fn2]);
151 var unichar = new CIS(ss.newInputStream(0),
152 UNICODE_FILES[fn1], 8192, 0x0);
154 if (!equalUnicharStreams(unichar, fin2))
155 do_throw("unequal streams: " +
156 UNICODE_FILES[fn1] + ", " +
157 UNICODE_FILES[fn2]);
158 }
159 }
160 }
163 // utility functions
165 function StorageStream()
166 {
167 return new _SS(8192, Math.pow(2, 32) - 1, null);
168 }
170 function getUnicharInputStream(filename, encoding)
171 {
172 var file = dataDir.clone();
173 file.append(filename);
175 const PR_RDONLY = 0x1;
176 var fis = new FIS(file, PR_RDONLY, 0644, Ci.nsIFileInputStream.CLOSE_ON_EOF);
177 return new CIS(fis, encoding, 8192, 0x0);
178 }
180 function getBinaryInputStream(filename, encoding)
181 {
182 var file = dataDir.clone();
183 file.append(filename);
185 const PR_RDONLY = 0x1;
186 var fis = new FIS(file, PR_RDONLY, 0644, Ci.nsIFileInputStream.CLOSE_ON_EOF);
187 return new BIS(fis);
188 }
190 function equal(stream, codePoints)
191 {
192 var sz, currIndex = 0;
193 while (true)
194 {
195 var unit = stream.readUnit();
196 if (unit < 0)
197 return currIndex == codePoints.length;
198 if (unit !== codePoints[currIndex++])
199 return false;
200 }
202 do_throw("not reached");
203 return false;
204 }
206 function equalUnicharStreams(s1, s2)
207 {
208 var r1, r2;
209 var str1 = {}, str2 = {};
210 while (true)
211 {
212 r1 = s1.readString(1024, str1);
213 r2 = s2.readString(1024, str2);
215 if (r1 != r2 || str1.value != str2.value)
216 {
217 print("r1: " + r1 + ", r2: " + r2);
218 print(str1.value.length);
219 print(str2.value.length);
220 return false;
221 }
222 if (r1 == 0 && r2 == 0)
223 return true;
224 }
226 // not reached
227 return false;
228 }
230 function stringToCodePoints(str)
231 {
232 return str.split('').map(function(v){ return v.charCodeAt(0); });
233 }
235 function lowbits(n)
236 {
237 return Math.pow(2, n) - 1;
238 }
240 function Pipe()
241 {
242 return new _Pipe(false, false, 1024, 10, null);
243 }
246 // complex charset readers
248 /**
249 * Wraps a UTF-8 stream to allow access to the Unicode code points in it.
250 *
251 * @param stream
252 * the stream to wrap
253 */
254 function UTF8(stream)
255 {
256 this._stream = new BIS(stream);
257 }
258 UTF8.prototype =
259 {
260 // returns numeric code point at front of stream encoded in UTF-8, -1 if at
261 // end of stream, or throws if valid (and properly encoded!) code point not
262 // found
263 readUnit: function()
264 {
265 var str = this._stream;
267 var c, c2, c3, c4, rv;
269 // if at end of stream, must distinguish failure to read any bytes
270 // (correct behavior) from failure to read some byte after the first
271 // in the character
272 try
273 {
274 c = str.read8();
275 }
276 catch (e)
277 {
278 return -1;
279 }
281 if (c < 0x80)
282 return c;
284 if (c < 0xC0) // c < 11000000
285 {
286 // byte doesn't have enough leading ones (must be at least two)
287 throw NS_ERROR_ILLEGAL_VALUE;
288 }
291 c2 = str.read8();
292 if (c2 >= 0xC0 || c2 < 0x80)
293 throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx
295 if (c < 0xE0) // c < 11100000
296 {
297 // two-byte between U+000080 and U+0007FF
298 rv = ((lowbits(5) & c) << 6) +
299 (lowbits(6) & c2);
300 // no upper bounds-check needed, by previous lines
301 if (rv >= 0x80)
302 return rv;
303 throw NS_ERROR_ILLEGAL_VALUE;
304 }
307 c3 = str.read8();
308 if (c3 >= 0xC0 || c3 < 0x80)
309 throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx
311 if (c < 0xF0) // c < 11110000
312 {
313 // three-byte between U+000800 and U+00FFFF
314 rv = ((lowbits(4) & c) << 12) +
315 ((lowbits(6) & c2) << 6) +
316 (lowbits(6) & c3);
317 // no upper bounds-check needed, by previous lines
318 if (rv >= 0xE000 ||
319 (rv >= 0x800 && rv <= 0xD7FF))
320 return rv;
321 throw NS_ERROR_ILLEGAL_VALUE;
322 }
325 c4 = str.read8();
326 if (c4 >= 0xC0 || c4 < 0x80)
327 throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx
329 if (c < 0xF8) // c < 11111000
330 {
331 // four-byte between U+010000 and U+10FFFF
332 rv = ((lowbits(3) & c) << 18) +
333 ((lowbits(6) & c2) << 12) +
334 ((lowbits(6) & c3) << 6) +
335 (lowbits(6) & c4);
336 // need an upper bounds-check since 0x10FFFF isn't (2**n - 1)
337 if (rv >= 0x10000 && rv <= 0x10FFFF)
338 return rv;
339 throw NS_ERROR_ILLEGAL_VALUE;
340 }
342 // 11111000 or greater -- no UTF-8 mapping
343 throw NS_ERROR_ILLEGAL_VALUE;
344 }
345 };
347 /**
348 * Wraps a UTF-16 stream to allow access to the Unicode code points in it.
349 *
350 * @param stream
351 * the stream to wrap
352 * @param bigEndian
353 * true for UTF-16BE, false for UTF-16LE, not present at all for UTF-16 with
354 * a byte-order mark
355 */
356 function UTF16(stream, bigEndian)
357 {
358 this._stream = new BIS(stream);
359 if (arguments.length > 1)
360 {
361 this._bigEndian = bigEndian;
362 }
363 else
364 {
365 var bom = this._stream.read16();
366 if (bom == 0xFEFF)
367 this._bigEndian = true;
368 else if (bom == 0xFFFE)
369 this._bigEndian = false;
370 else
371 do_throw("missing BOM: " + bom.toString(16).toUpperCase());
372 }
373 }
374 UTF16.prototype =
375 {
376 // returns numeric code point at front of stream encoded in UTF-16,
377 // -1 if at end of stream, or throws if UTF-16 code point not found
378 readUnit: function()
379 {
380 var str = this._stream;
382 // if at end of stream, must distinguish failure to read any bytes
383 // (correct behavior) from failure to read some byte after the first
384 // in the character
385 try
386 {
387 var b1 = str.read8();
388 }
389 catch (e)
390 {
391 return -1;
392 }
394 var b2 = str.read8();
396 var w1 = this._bigEndian
397 ? (b1 << 8) + b2
398 : (b2 << 8) + b1;
400 if (w1 > 0xDBFF && w1 < 0xE000)
401 {
402 // second surrogate, but expecting none or first
403 throw NS_ERROR_ILLEGAL_VALUE;
404 }
406 if (w1 > 0xD7FF && w1 < 0xDC00)
407 {
408 // non-BMP, use surrogate pair
409 b1 = str.read8();
410 b2 = str.read8();
411 var w2 = this._bigEndian
412 ? (b1 << 8) + b2
413 : (b2 << 8) + b1;
414 if (w2 < 0xDC00 || w2 > 0xDFFF)
415 throw NS_ERROR_ILLEGAL_VALUE;
417 var rv = 0x100000 +
418 ((lowbits(10) & w2) << 10) +
419 (lowbits(10) & w1);
420 if (rv <= 0x10FFFF)
421 return rv;
422 throw NS_ERROR_ILLEGAL_VALUE;
423 }
425 // non-surrogate
426 return w1;
427 }
428 };