Tue, 06 Jan 2015 21:39:09 +0100
Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.
michael@0 | 1 | const Cc = Components.classes; |
michael@0 | 2 | const Ci = Components.interfaces; |
michael@0 | 3 | |
michael@0 | 4 | const NS_ERROR_ILLEGAL_VALUE = Components.results.NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 5 | |
michael@0 | 6 | var BIS, BOS, _Pipe, COS, FIS, _SS, CIS; |
michael@0 | 7 | |
michael@0 | 8 | var dataDir; |
michael@0 | 9 | |
michael@0 | 10 | function run_test() |
michael@0 | 11 | { |
michael@0 | 12 | BIS = Components.Constructor("@mozilla.org/binaryinputstream;1", |
michael@0 | 13 | "nsIBinaryInputStream", |
michael@0 | 14 | "setInputStream"); |
michael@0 | 15 | BOS = Components.Constructor("@mozilla.org/binaryoutputstream;1", |
michael@0 | 16 | "nsIBinaryOutputStream", |
michael@0 | 17 | "setOutputStream"); |
michael@0 | 18 | _Pipe = Components.Constructor("@mozilla.org/pipe;1", |
michael@0 | 19 | "nsIPipe", |
michael@0 | 20 | "init"); |
michael@0 | 21 | COS = Components.Constructor("@mozilla.org/intl/converter-output-stream;1", |
michael@0 | 22 | "nsIConverterOutputStream", |
michael@0 | 23 | "init"); |
michael@0 | 24 | FIS = Components.Constructor("@mozilla.org/network/file-input-stream;1", |
michael@0 | 25 | "nsIFileInputStream", |
michael@0 | 26 | "init"); |
michael@0 | 27 | _SS = Components.Constructor("@mozilla.org/storagestream;1", |
michael@0 | 28 | "nsIStorageStream", |
michael@0 | 29 | "init"); |
michael@0 | 30 | CIS = Components.Constructor("@mozilla.org/intl/converter-input-stream;1", |
michael@0 | 31 | "nsIConverterInputStream", |
michael@0 | 32 | "init"); |
michael@0 | 33 | |
michael@0 | 34 | dataDir = do_get_file("data/"); |
michael@0 | 35 | |
michael@0 | 36 | test_utf8_1(); |
michael@0 | 37 | test_utf16_1(); |
michael@0 | 38 | test_utf16_2(); |
michael@0 | 39 | test_utf16_3(); |
michael@0 | 40 | test_cross_conversion(); |
michael@0 | 41 | } |
michael@0 | 42 | |
michael@0 | 43 | const UNICODE_STRINGS = |
michael@0 | 44 | [ |
michael@0 | 45 | '\u00BD + \u00BE == \u00BD\u00B2 + \u00BC + \u00BE', |
michael@0 | 46 | |
michael@0 | 47 | 'AZaz09 \u007F ' + // U+000000 to U+00007F |
michael@0 | 48 | '\u0080 \u0398 \u03BB \u0725 ' + // U+000080 to U+0007FF |
michael@0 | 49 | '\u0964 \u0F5F \u20AC \uFFFB' // U+000800 to U+00FFFF |
michael@0 | 50 | |
michael@0 | 51 | // there would be strings containing non-BMP code points here, but |
michael@0 | 52 | // unfortunately JS strings are UCS-2 (and worse yet are treated as |
michael@0 | 53 | // 16-bit values by the spec), so we have to do gymnastics to work |
michael@0 | 54 | // with non-BMP -- manual surrogate decoding doesn't work because |
michael@0 | 55 | // String.prototype.charCodeAt() ignores surrogate pairs and only |
michael@0 | 56 | // returns 16-bit values |
michael@0 | 57 | ]; |
michael@0 | 58 | |
michael@0 | 59 | // test conversion equality -- keys are names of files containing equivalent |
michael@0 | 60 | // Unicode data, values are the encoding of the file in the format expected by |
michael@0 | 61 | // nsIConverter(In|Out)putStream.init |
michael@0 | 62 | const UNICODE_FILES = |
michael@0 | 63 | { |
michael@0 | 64 | "unicode-conversion.utf8.txt": "UTF-8", |
michael@0 | 65 | "unicode-conversion.utf16.txt": "UTF-16", |
michael@0 | 66 | "unicode-conversion.utf16le.txt": "UTF-16LE", |
michael@0 | 67 | "unicode-conversion.utf16be.txt": "UTF-16BE" |
michael@0 | 68 | }; |
michael@0 | 69 | |
michael@0 | 70 | function test_utf8_1() |
michael@0 | 71 | { |
michael@0 | 72 | for (var i = 0; i < UNICODE_STRINGS.length; i++) |
michael@0 | 73 | { |
michael@0 | 74 | var pipe = Pipe(); |
michael@0 | 75 | var conv = new COS(pipe.outputStream, "UTF-8", 1024, 0x0); |
michael@0 | 76 | do_check_true(conv.writeString(UNICODE_STRINGS[i])); |
michael@0 | 77 | conv.close(); |
michael@0 | 78 | |
michael@0 | 79 | if (!equal(new UTF8(pipe.inputStream), |
michael@0 | 80 | stringToCodePoints(UNICODE_STRINGS[i]))) |
michael@0 | 81 | do_throw("UNICODE_STRINGS[" + i + "] not handled correctly"); |
michael@0 | 82 | } |
michael@0 | 83 | } |
michael@0 | 84 | |
michael@0 | 85 | function test_utf16_1() |
michael@0 | 86 | { |
michael@0 | 87 | for (var i = 0; i < UNICODE_STRINGS.length; i++) |
michael@0 | 88 | { |
michael@0 | 89 | var pipe = Pipe(); |
michael@0 | 90 | var conv = new COS(pipe.outputStream, "UTF-16", 1024, 0x0); |
michael@0 | 91 | do_check_true(conv.writeString(UNICODE_STRINGS[i])); |
michael@0 | 92 | conv.close(); |
michael@0 | 93 | |
michael@0 | 94 | if (!equal(new UTF16(pipe.inputStream), |
michael@0 | 95 | stringToCodePoints(UNICODE_STRINGS[i]))) |
michael@0 | 96 | do_throw("UNICODE_STRINGS[" + i + "] not handled correctly"); |
michael@0 | 97 | } |
michael@0 | 98 | } |
michael@0 | 99 | |
michael@0 | 100 | function test_utf16_2() |
michael@0 | 101 | { |
michael@0 | 102 | for (var i = 0; i < UNICODE_STRINGS.length; i++) |
michael@0 | 103 | { |
michael@0 | 104 | var pipe = Pipe(); |
michael@0 | 105 | var conv = new COS(pipe.outputStream, "UTF-16LE", 1024, 0x0); |
michael@0 | 106 | do_check_true(conv.writeString(UNICODE_STRINGS[i])); |
michael@0 | 107 | conv.close(); |
michael@0 | 108 | |
michael@0 | 109 | if (!equal(new UTF16(pipe.inputStream, false), |
michael@0 | 110 | stringToCodePoints(UNICODE_STRINGS[i]))) |
michael@0 | 111 | do_throw("UNICODE_STRINGS[" + i + "] not handled correctly"); |
michael@0 | 112 | } |
michael@0 | 113 | } |
michael@0 | 114 | |
michael@0 | 115 | function test_utf16_3() |
michael@0 | 116 | { |
michael@0 | 117 | for (var i = 0; i < UNICODE_STRINGS.length; i++) |
michael@0 | 118 | { |
michael@0 | 119 | var pipe = Pipe(); |
michael@0 | 120 | var conv = new COS(pipe.outputStream, "UTF-16BE", 1024, 0x0); |
michael@0 | 121 | do_check_true(conv.writeString(UNICODE_STRINGS[i])); |
michael@0 | 122 | conv.close(); |
michael@0 | 123 | |
michael@0 | 124 | if (!equal(new UTF16(pipe.inputStream, true), |
michael@0 | 125 | stringToCodePoints(UNICODE_STRINGS[i]))) |
michael@0 | 126 | do_throw("UNICODE_STRINGS[" + i + "] not handled correctly"); |
michael@0 | 127 | } |
michael@0 | 128 | } |
michael@0 | 129 | |
michael@0 | 130 | |
michael@0 | 131 | function test_cross_conversion() |
michael@0 | 132 | { |
michael@0 | 133 | for (var fn1 in UNICODE_FILES) |
michael@0 | 134 | { |
michael@0 | 135 | var fin = getBinaryInputStream(fn1); |
michael@0 | 136 | var ss = StorageStream(); |
michael@0 | 137 | |
michael@0 | 138 | var bos = new BOS(ss.getOutputStream(0)); |
michael@0 | 139 | var av; |
michael@0 | 140 | while ((av = fin.available()) > 0) |
michael@0 | 141 | { |
michael@0 | 142 | var data = fin.readByteArray(av); |
michael@0 | 143 | bos.writeByteArray(data, data.length); |
michael@0 | 144 | } |
michael@0 | 145 | fin.close(); |
michael@0 | 146 | bos.close(); |
michael@0 | 147 | |
michael@0 | 148 | for (var fn2 in UNICODE_FILES) |
michael@0 | 149 | { |
michael@0 | 150 | var fin2 = getUnicharInputStream(fn2, UNICODE_FILES[fn2]); |
michael@0 | 151 | var unichar = new CIS(ss.newInputStream(0), |
michael@0 | 152 | UNICODE_FILES[fn1], 8192, 0x0); |
michael@0 | 153 | |
michael@0 | 154 | if (!equalUnicharStreams(unichar, fin2)) |
michael@0 | 155 | do_throw("unequal streams: " + |
michael@0 | 156 | UNICODE_FILES[fn1] + ", " + |
michael@0 | 157 | UNICODE_FILES[fn2]); |
michael@0 | 158 | } |
michael@0 | 159 | } |
michael@0 | 160 | } |
michael@0 | 161 | |
michael@0 | 162 | |
michael@0 | 163 | // utility functions |
michael@0 | 164 | |
michael@0 | 165 | function StorageStream() |
michael@0 | 166 | { |
michael@0 | 167 | return new _SS(8192, Math.pow(2, 32) - 1, null); |
michael@0 | 168 | } |
michael@0 | 169 | |
michael@0 | 170 | function getUnicharInputStream(filename, encoding) |
michael@0 | 171 | { |
michael@0 | 172 | var file = dataDir.clone(); |
michael@0 | 173 | file.append(filename); |
michael@0 | 174 | |
michael@0 | 175 | const PR_RDONLY = 0x1; |
michael@0 | 176 | var fis = new FIS(file, PR_RDONLY, 0644, Ci.nsIFileInputStream.CLOSE_ON_EOF); |
michael@0 | 177 | return new CIS(fis, encoding, 8192, 0x0); |
michael@0 | 178 | } |
michael@0 | 179 | |
michael@0 | 180 | function getBinaryInputStream(filename, encoding) |
michael@0 | 181 | { |
michael@0 | 182 | var file = dataDir.clone(); |
michael@0 | 183 | file.append(filename); |
michael@0 | 184 | |
michael@0 | 185 | const PR_RDONLY = 0x1; |
michael@0 | 186 | var fis = new FIS(file, PR_RDONLY, 0644, Ci.nsIFileInputStream.CLOSE_ON_EOF); |
michael@0 | 187 | return new BIS(fis); |
michael@0 | 188 | } |
michael@0 | 189 | |
michael@0 | 190 | function equal(stream, codePoints) |
michael@0 | 191 | { |
michael@0 | 192 | var sz, currIndex = 0; |
michael@0 | 193 | while (true) |
michael@0 | 194 | { |
michael@0 | 195 | var unit = stream.readUnit(); |
michael@0 | 196 | if (unit < 0) |
michael@0 | 197 | return currIndex == codePoints.length; |
michael@0 | 198 | if (unit !== codePoints[currIndex++]) |
michael@0 | 199 | return false; |
michael@0 | 200 | } |
michael@0 | 201 | |
michael@0 | 202 | do_throw("not reached"); |
michael@0 | 203 | return false; |
michael@0 | 204 | } |
michael@0 | 205 | |
michael@0 | 206 | function equalUnicharStreams(s1, s2) |
michael@0 | 207 | { |
michael@0 | 208 | var r1, r2; |
michael@0 | 209 | var str1 = {}, str2 = {}; |
michael@0 | 210 | while (true) |
michael@0 | 211 | { |
michael@0 | 212 | r1 = s1.readString(1024, str1); |
michael@0 | 213 | r2 = s2.readString(1024, str2); |
michael@0 | 214 | |
michael@0 | 215 | if (r1 != r2 || str1.value != str2.value) |
michael@0 | 216 | { |
michael@0 | 217 | print("r1: " + r1 + ", r2: " + r2); |
michael@0 | 218 | print(str1.value.length); |
michael@0 | 219 | print(str2.value.length); |
michael@0 | 220 | return false; |
michael@0 | 221 | } |
michael@0 | 222 | if (r1 == 0 && r2 == 0) |
michael@0 | 223 | return true; |
michael@0 | 224 | } |
michael@0 | 225 | |
michael@0 | 226 | // not reached |
michael@0 | 227 | return false; |
michael@0 | 228 | } |
michael@0 | 229 | |
michael@0 | 230 | function stringToCodePoints(str) |
michael@0 | 231 | { |
michael@0 | 232 | return str.split('').map(function(v){ return v.charCodeAt(0); }); |
michael@0 | 233 | } |
michael@0 | 234 | |
michael@0 | 235 | function lowbits(n) |
michael@0 | 236 | { |
michael@0 | 237 | return Math.pow(2, n) - 1; |
michael@0 | 238 | } |
michael@0 | 239 | |
michael@0 | 240 | function Pipe() |
michael@0 | 241 | { |
michael@0 | 242 | return new _Pipe(false, false, 1024, 10, null); |
michael@0 | 243 | } |
michael@0 | 244 | |
michael@0 | 245 | |
michael@0 | 246 | // complex charset readers |
michael@0 | 247 | |
michael@0 | 248 | /** |
michael@0 | 249 | * Wraps a UTF-8 stream to allow access to the Unicode code points in it. |
michael@0 | 250 | * |
michael@0 | 251 | * @param stream |
michael@0 | 252 | * the stream to wrap |
michael@0 | 253 | */ |
michael@0 | 254 | function UTF8(stream) |
michael@0 | 255 | { |
michael@0 | 256 | this._stream = new BIS(stream); |
michael@0 | 257 | } |
michael@0 | 258 | UTF8.prototype = |
michael@0 | 259 | { |
michael@0 | 260 | // returns numeric code point at front of stream encoded in UTF-8, -1 if at |
michael@0 | 261 | // end of stream, or throws if valid (and properly encoded!) code point not |
michael@0 | 262 | // found |
michael@0 | 263 | readUnit: function() |
michael@0 | 264 | { |
michael@0 | 265 | var str = this._stream; |
michael@0 | 266 | |
michael@0 | 267 | var c, c2, c3, c4, rv; |
michael@0 | 268 | |
michael@0 | 269 | // if at end of stream, must distinguish failure to read any bytes |
michael@0 | 270 | // (correct behavior) from failure to read some byte after the first |
michael@0 | 271 | // in the character |
michael@0 | 272 | try |
michael@0 | 273 | { |
michael@0 | 274 | c = str.read8(); |
michael@0 | 275 | } |
michael@0 | 276 | catch (e) |
michael@0 | 277 | { |
michael@0 | 278 | return -1; |
michael@0 | 279 | } |
michael@0 | 280 | |
michael@0 | 281 | if (c < 0x80) |
michael@0 | 282 | return c; |
michael@0 | 283 | |
michael@0 | 284 | if (c < 0xC0) // c < 11000000 |
michael@0 | 285 | { |
michael@0 | 286 | // byte doesn't have enough leading ones (must be at least two) |
michael@0 | 287 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 288 | } |
michael@0 | 289 | |
michael@0 | 290 | |
michael@0 | 291 | c2 = str.read8(); |
michael@0 | 292 | if (c2 >= 0xC0 || c2 < 0x80) |
michael@0 | 293 | throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx |
michael@0 | 294 | |
michael@0 | 295 | if (c < 0xE0) // c < 11100000 |
michael@0 | 296 | { |
michael@0 | 297 | // two-byte between U+000080 and U+0007FF |
michael@0 | 298 | rv = ((lowbits(5) & c) << 6) + |
michael@0 | 299 | (lowbits(6) & c2); |
michael@0 | 300 | // no upper bounds-check needed, by previous lines |
michael@0 | 301 | if (rv >= 0x80) |
michael@0 | 302 | return rv; |
michael@0 | 303 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 304 | } |
michael@0 | 305 | |
michael@0 | 306 | |
michael@0 | 307 | c3 = str.read8(); |
michael@0 | 308 | if (c3 >= 0xC0 || c3 < 0x80) |
michael@0 | 309 | throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx |
michael@0 | 310 | |
michael@0 | 311 | if (c < 0xF0) // c < 11110000 |
michael@0 | 312 | { |
michael@0 | 313 | // three-byte between U+000800 and U+00FFFF |
michael@0 | 314 | rv = ((lowbits(4) & c) << 12) + |
michael@0 | 315 | ((lowbits(6) & c2) << 6) + |
michael@0 | 316 | (lowbits(6) & c3); |
michael@0 | 317 | // no upper bounds-check needed, by previous lines |
michael@0 | 318 | if (rv >= 0xE000 || |
michael@0 | 319 | (rv >= 0x800 && rv <= 0xD7FF)) |
michael@0 | 320 | return rv; |
michael@0 | 321 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 322 | } |
michael@0 | 323 | |
michael@0 | 324 | |
michael@0 | 325 | c4 = str.read8(); |
michael@0 | 326 | if (c4 >= 0xC0 || c4 < 0x80) |
michael@0 | 327 | throw NS_ERROR_ILLEGAL_VALUE; // not 10xxxxxx |
michael@0 | 328 | |
michael@0 | 329 | if (c < 0xF8) // c < 11111000 |
michael@0 | 330 | { |
michael@0 | 331 | // four-byte between U+010000 and U+10FFFF |
michael@0 | 332 | rv = ((lowbits(3) & c) << 18) + |
michael@0 | 333 | ((lowbits(6) & c2) << 12) + |
michael@0 | 334 | ((lowbits(6) & c3) << 6) + |
michael@0 | 335 | (lowbits(6) & c4); |
michael@0 | 336 | // need an upper bounds-check since 0x10FFFF isn't (2**n - 1) |
michael@0 | 337 | if (rv >= 0x10000 && rv <= 0x10FFFF) |
michael@0 | 338 | return rv; |
michael@0 | 339 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 340 | } |
michael@0 | 341 | |
michael@0 | 342 | // 11111000 or greater -- no UTF-8 mapping |
michael@0 | 343 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 344 | } |
michael@0 | 345 | }; |
michael@0 | 346 | |
michael@0 | 347 | /** |
michael@0 | 348 | * Wraps a UTF-16 stream to allow access to the Unicode code points in it. |
michael@0 | 349 | * |
michael@0 | 350 | * @param stream |
michael@0 | 351 | * the stream to wrap |
michael@0 | 352 | * @param bigEndian |
michael@0 | 353 | * true for UTF-16BE, false for UTF-16LE, not present at all for UTF-16 with |
michael@0 | 354 | * a byte-order mark |
michael@0 | 355 | */ |
michael@0 | 356 | function UTF16(stream, bigEndian) |
michael@0 | 357 | { |
michael@0 | 358 | this._stream = new BIS(stream); |
michael@0 | 359 | if (arguments.length > 1) |
michael@0 | 360 | { |
michael@0 | 361 | this._bigEndian = bigEndian; |
michael@0 | 362 | } |
michael@0 | 363 | else |
michael@0 | 364 | { |
michael@0 | 365 | var bom = this._stream.read16(); |
michael@0 | 366 | if (bom == 0xFEFF) |
michael@0 | 367 | this._bigEndian = true; |
michael@0 | 368 | else if (bom == 0xFFFE) |
michael@0 | 369 | this._bigEndian = false; |
michael@0 | 370 | else |
michael@0 | 371 | do_throw("missing BOM: " + bom.toString(16).toUpperCase()); |
michael@0 | 372 | } |
michael@0 | 373 | } |
michael@0 | 374 | UTF16.prototype = |
michael@0 | 375 | { |
michael@0 | 376 | // returns numeric code point at front of stream encoded in UTF-16, |
michael@0 | 377 | // -1 if at end of stream, or throws if UTF-16 code point not found |
michael@0 | 378 | readUnit: function() |
michael@0 | 379 | { |
michael@0 | 380 | var str = this._stream; |
michael@0 | 381 | |
michael@0 | 382 | // if at end of stream, must distinguish failure to read any bytes |
michael@0 | 383 | // (correct behavior) from failure to read some byte after the first |
michael@0 | 384 | // in the character |
michael@0 | 385 | try |
michael@0 | 386 | { |
michael@0 | 387 | var b1 = str.read8(); |
michael@0 | 388 | } |
michael@0 | 389 | catch (e) |
michael@0 | 390 | { |
michael@0 | 391 | return -1; |
michael@0 | 392 | } |
michael@0 | 393 | |
michael@0 | 394 | var b2 = str.read8(); |
michael@0 | 395 | |
michael@0 | 396 | var w1 = this._bigEndian |
michael@0 | 397 | ? (b1 << 8) + b2 |
michael@0 | 398 | : (b2 << 8) + b1; |
michael@0 | 399 | |
michael@0 | 400 | if (w1 > 0xDBFF && w1 < 0xE000) |
michael@0 | 401 | { |
michael@0 | 402 | // second surrogate, but expecting none or first |
michael@0 | 403 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 404 | } |
michael@0 | 405 | |
michael@0 | 406 | if (w1 > 0xD7FF && w1 < 0xDC00) |
michael@0 | 407 | { |
michael@0 | 408 | // non-BMP, use surrogate pair |
michael@0 | 409 | b1 = str.read8(); |
michael@0 | 410 | b2 = str.read8(); |
michael@0 | 411 | var w2 = this._bigEndian |
michael@0 | 412 | ? (b1 << 8) + b2 |
michael@0 | 413 | : (b2 << 8) + b1; |
michael@0 | 414 | if (w2 < 0xDC00 || w2 > 0xDFFF) |
michael@0 | 415 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 416 | |
michael@0 | 417 | var rv = 0x100000 + |
michael@0 | 418 | ((lowbits(10) & w2) << 10) + |
michael@0 | 419 | (lowbits(10) & w1); |
michael@0 | 420 | if (rv <= 0x10FFFF) |
michael@0 | 421 | return rv; |
michael@0 | 422 | throw NS_ERROR_ILLEGAL_VALUE; |
michael@0 | 423 | } |
michael@0 | 424 | |
michael@0 | 425 | // non-surrogate |
michael@0 | 426 | return w1; |
michael@0 | 427 | } |
michael@0 | 428 | }; |