The Tor Browser: dom/encoding/test/unit/test

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // NOTE: Requires testharness.js

     2 // http://www.w3.org/2008/webapps/wiki/Harness

     4 test(

     5   function() {

     6     var badStrings = [

     7       { input: '\ud800', expected: '\ufffd' }, // Surrogate half

     8       { input: '\udc00', expected: '\ufffd' }, // Surrogate half

     9       { input: 'abc\ud800def', expected: 'abc\ufffddef' }, // Surrogate half

    10       { input: 'abc\udc00def', expected: 'abc\ufffddef' }, // Surrogate half

    11       { input: '\udc00\ud800', expected: '\ufffd\ufffd' } // Wrong order

    12     ];

    14     badStrings.forEach(

    15       function(t) {

    16         var encoded = new TextEncoder('utf-8').encode(t.input);

    17         var decoded = new TextDecoder('utf-8').decode(encoded);

    18         assert_equals(t.expected, decoded);

    19       });

    20   },

    21   "bad data"

    22 );

    24 test(

    25   function() {

    26     var bad = [

    27       { encoding: 'utf-8', input: [0xC0] }, // ends early

    28       { encoding: 'utf-8', input: [0xC0, 0x00] }, // invalid trail

    29       { encoding: 'utf-8', input: [0xC0, 0xC0] }, // invalid trail

    30       { encoding: 'utf-8', input: [0xE0] }, // ends early

    31       { encoding: 'utf-8', input: [0xE0, 0x00] }, // invalid trail

    32       { encoding: 'utf-8', input: [0xE0, 0xC0] }, // invalid trail

    33       { encoding: 'utf-8', input: [0xE0, 0x80, 0x00] }, // invalid trail

    34       { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0] }, // invalid trail

    35       { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF

    36       { encoding: 'utf-16le', input: [0x00] }, // truncated code unit

    37       { encoding: 'utf-16le', input: [0x00, 0xd8] }, // surrogate half

    38       { encoding: 'utf-16le', input: [0x00, 0xd8, 0x00, 0x00] }, // surrogate half

    39       { encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0x00] }, // trail surrogate

    40       { encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0xd8] }  // swapped surrogates

    41       // TODO: Single byte encoding cases

    42     ];

    44     bad.forEach(

    45       function(t) {

    46         assert_throws({name: 'EncodingError'}, function () {

    47           new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input));

    48         });

    49       });

    50   },

    51   "fatal flag"

    52 );

    54 test(

    55   function() {

    56     var encodings = [

    57       { label: 'utf-8', encoding: 'utf-8' },

    58       { label: 'utf-16', encoding: 'utf-16le' },

    59       { label: 'utf-16le', encoding: 'utf-16le' },

    60       { label: 'utf-16be', encoding: 'utf-16be' },

    61       { label: 'ascii', encoding: 'windows-1252' },

    62       { label: 'iso-8859-1', encoding: 'windows-1252' }

    63     ];

    65     encodings.forEach(

    66       function(test) {

    67         assert_equals(new TextDecoder(test.label.toLowerCase()).encoding, test.encoding);

    68         assert_equals(new TextDecoder(test.label.toUpperCase()).encoding, test.encoding);

    69       });

    70   },

    71   "Encoding names are case insensitive"

    72 );

    74 test(

    75   function() {

    76     var utf8_bom = [0xEF, 0xBB, 0xBF];

    77     var utf8 = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD];

    79     var utf16le_bom = [0xff, 0xfe];

    80     var utf16le = [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF];

    82     var utf16be_bom = [0xfe, 0xff];

    83     var utf16be = [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD];

    85     var string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character

    87     // missing BOMs

    88     assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8)), string);

    89     assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le)), string);

    90     assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be)), string);

    92     // matching BOMs

    93     assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8_bom.concat(utf8))), string);

    94     assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le_bom.concat(utf16le))), string)

    95     assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be_bom.concat(utf16be))), string);

    97     // matching BOMs split

    98     var decoder8 = new TextDecoder('utf-8');

    99     assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 1)), {stream: true}), '');

   100     assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(1).concat(utf8))), string);

   101     assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 2)), {stream: true}), '');

   102     assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(2).concat(utf8))), string);

   103     var decoder16le = new TextDecoder('utf-16le');

   104     assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(0, 1)), {stream: true}), '');

   105     assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(1).concat(utf16le))), string);

   106     var decoder16be = new TextDecoder('utf-16be');

   107     assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(0, 1)), {stream: true}), '');

   108     assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(1).concat(utf16be))), string);

   110     // mismatching BOMs

   111     assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16le_bom.concat(utf8))), string);

   112     assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16be_bom.concat(utf8))), string);

   113     assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf8_bom.concat(utf16le))), string);

   114     assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16be_bom.concat(utf16le))), string);

   115     assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf8_bom.concat(utf16be))), string);

   116     assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16le_bom.concat(utf16be))), string);

   117   },

   118   "Byte-order marks"

   119 );

   121 test(

   122   function () {

   123     assert_equals(new TextDecoder("utf-8").encoding, "utf-8"); // canonical case

   124     assert_equals(new TextDecoder("UTF-16").encoding, "utf-16le"); // canonical case and name

   125     assert_equals(new TextDecoder("UTF-16BE").encoding, "utf-16be"); // canonical case and name

   126     assert_equals(new TextDecoder("iso8859-1").encoding, "windows-1252"); // canonical case and name

   127     assert_equals(new TextDecoder("iso-8859-1").encoding, "windows-1252"); // canonical case and name

   128   },

   129   "Encoding names"

   130 );

   132 test(

   133   function () {

   134     ["utf-8", "utf-16le", "utf-16be"].forEach(function (encoding) {

   135       var string = "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF";

   136       var encoded = new TextEncoder(encoding).encode(string);

   138       for (var len = 1; len <= 5; ++len) {

   139         var out = "", decoder = new TextDecoder(encoding);

   140         for (var i = 0; i < encoded.length; i += len) {

   141           var sub = [];

   142           for (var j = i; j < encoded.length && j < i + len; ++j) {

   143             sub.push(encoded[j]);

   144           }

   145           out += decoder.decode(new Uint8Array(sub), {stream: true});

   146         }

   147         out += decoder.decode();

   148         assert_equals(out, string, "streaming decode " + encoding);

   149       }

   150     });

   151   },

   152   "Streaming Decode"

   153 );

   155 test(

   156   function () {

   157     var jis = [0x82, 0xC9, 0x82, 0xD9, 0x82, 0xF1];

   158     var expected = "\u306B\u307B\u3093"; // Nihon

   159     assert_equals(new TextDecoder("shift_jis").decode(new Uint8Array(jis)), expected);

   160   },

   161   "Shift_JIS Decode"

   162 );

   164 test(

   165   function () {

   166     var encodings = ["utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"];

   168     encodings.forEach(function (encoding) {

   169       var string = '', bytes = [];

   170       for (var i = 0; i < 128; ++i) {

   172         // Encodings that have escape codes in 0x00-0x7F

   173         if (encoding === "hz-gb-2312" && i === 0x7E)

   174           continue;

   175         if (encoding === "iso-2022-jp" && i === 0x1B)

   176           continue;

   178         string += String.fromCharCode(i);

   179         bytes.push(i);

   180       }

   181       var ascii_encoded = new TextEncoder('utf-8').encode(string);

   182       assert_equals(new TextDecoder(encoding).decode(ascii_encoded), string, encoding);

   183       //assert_array_equals(new TextEncoder(encoding).encode(string), bytes, encoding);

   184     });

   185   },

   186   "Supersets of ASCII decode ASCII correctly"

   187 );

   189 test(

   190   function () {

   191     assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-8", {fatal: true}).decode(new Uint8Array([0xff])); });

   192     // This should not hang:

   193     new TextDecoder("utf-8").decode(new Uint8Array([0xff]));

   195     assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-16", {fatal: true}).decode(new Uint8Array([0x00])); });

   196     // This should not hang:

   197     new TextDecoder("utf-16").decode(new Uint8Array([0x00]));

   199     assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-16be", {fatal: true}).decode(new Uint8Array([0x00])); });

   200     // This should not hang:

   201     new TextDecoder("utf-16be").decode(new Uint8Array([0x00]));

   202   },

   203   "Non-fatal errors at EOF"

   204 );

   206 test(

   207   function () {

   209     var utf_encodings = ["utf-8", "utf-16le", "utf-16be"];

   211     var legacy_encodings = ["ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"];

   213     utf_encodings.forEach(function(encoding) {

   214       assert_equals(new TextDecoder(encoding).encoding, encoding);

   215       assert_equals(new TextEncoder(encoding).encoding, encoding);

   216     });

   218     legacy_encodings.forEach(function(encoding) {

   219       assert_equals(new TextDecoder(encoding).encoding, encoding);

   220       assert_throws({name: 'TypeError'}, function() { new TextEncoder(encoding); });

   221     });

   222   },

   223   "Non-UTF encodings supported only for decode, not encode"

   224 );

The Tor Browser / file revision

dom/encoding/test/unit/test_misc.js@6474c204b198

dom/encoding/test/unit/test_misc.js