dom/encoding/test/unit/test_misc.js

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/dom/encoding/test/unit/test_misc.js	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,224 @@
     1.4 +// NOTE: Requires testharness.js
     1.5 +// http://www.w3.org/2008/webapps/wiki/Harness
     1.6 +
     1.7 +test(
     1.8 +  function() {
     1.9 +    var badStrings = [
    1.10 +      { input: '\ud800', expected: '\ufffd' }, // Surrogate half
    1.11 +      { input: '\udc00', expected: '\ufffd' }, // Surrogate half
    1.12 +      { input: 'abc\ud800def', expected: 'abc\ufffddef' }, // Surrogate half
    1.13 +      { input: 'abc\udc00def', expected: 'abc\ufffddef' }, // Surrogate half
    1.14 +      { input: '\udc00\ud800', expected: '\ufffd\ufffd' } // Wrong order
    1.15 +    ];
    1.16 +
    1.17 +    badStrings.forEach(
    1.18 +      function(t) {
    1.19 +        var encoded = new TextEncoder('utf-8').encode(t.input);
    1.20 +        var decoded = new TextDecoder('utf-8').decode(encoded);
    1.21 +        assert_equals(t.expected, decoded);
    1.22 +      });
    1.23 +  },
    1.24 +  "bad data"
    1.25 +);
    1.26 +
    1.27 +test(
    1.28 +  function() {
    1.29 +    var bad = [
    1.30 +      { encoding: 'utf-8', input: [0xC0] }, // ends early
    1.31 +      { encoding: 'utf-8', input: [0xC0, 0x00] }, // invalid trail
    1.32 +      { encoding: 'utf-8', input: [0xC0, 0xC0] }, // invalid trail
    1.33 +      { encoding: 'utf-8', input: [0xE0] }, // ends early
    1.34 +      { encoding: 'utf-8', input: [0xE0, 0x00] }, // invalid trail
    1.35 +      { encoding: 'utf-8', input: [0xE0, 0xC0] }, // invalid trail
    1.36 +      { encoding: 'utf-8', input: [0xE0, 0x80, 0x00] }, // invalid trail
    1.37 +      { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0] }, // invalid trail
    1.38 +      { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF
    1.39 +      { encoding: 'utf-16le', input: [0x00] }, // truncated code unit
    1.40 +      { encoding: 'utf-16le', input: [0x00, 0xd8] }, // surrogate half
    1.41 +      { encoding: 'utf-16le', input: [0x00, 0xd8, 0x00, 0x00] }, // surrogate half
    1.42 +      { encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0x00] }, // trail surrogate
    1.43 +      { encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0xd8] }  // swapped surrogates
    1.44 +      // TODO: Single byte encoding cases
    1.45 +    ];
    1.46 +
    1.47 +    bad.forEach(
    1.48 +      function(t) {
    1.49 +        assert_throws({name: 'EncodingError'}, function () {
    1.50 +          new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input));
    1.51 +        });
    1.52 +      });
    1.53 +  },
    1.54 +  "fatal flag"
    1.55 +);
    1.56 +
    1.57 +test(
    1.58 +  function() {
    1.59 +    var encodings = [
    1.60 +      { label: 'utf-8', encoding: 'utf-8' },
    1.61 +      { label: 'utf-16', encoding: 'utf-16le' },
    1.62 +      { label: 'utf-16le', encoding: 'utf-16le' },
    1.63 +      { label: 'utf-16be', encoding: 'utf-16be' },
    1.64 +      { label: 'ascii', encoding: 'windows-1252' },
    1.65 +      { label: 'iso-8859-1', encoding: 'windows-1252' }
    1.66 +    ];
    1.67 +
    1.68 +    encodings.forEach(
    1.69 +      function(test) {
    1.70 +        assert_equals(new TextDecoder(test.label.toLowerCase()).encoding, test.encoding);
    1.71 +        assert_equals(new TextDecoder(test.label.toUpperCase()).encoding, test.encoding);
    1.72 +      });
    1.73 +  },
    1.74 +  "Encoding names are case insensitive"
    1.75 +);
    1.76 +
    1.77 +test(
    1.78 +  function() {
    1.79 +    var utf8_bom = [0xEF, 0xBB, 0xBF];
    1.80 +    var utf8 = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD];
    1.81 +
    1.82 +    var utf16le_bom = [0xff, 0xfe];
    1.83 +    var utf16le = [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF];
    1.84 +
    1.85 +    var utf16be_bom = [0xfe, 0xff];
    1.86 +    var utf16be = [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD];
    1.87 +
    1.88 +    var string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character
    1.89 +
    1.90 +    // missing BOMs
    1.91 +    assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8)), string);
    1.92 +    assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le)), string);
    1.93 +    assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be)), string);
    1.94 +
    1.95 +    // matching BOMs
    1.96 +    assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8_bom.concat(utf8))), string);
    1.97 +    assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le_bom.concat(utf16le))), string)
    1.98 +    assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be_bom.concat(utf16be))), string);
    1.99 +
   1.100 +    // matching BOMs split
   1.101 +    var decoder8 = new TextDecoder('utf-8');
   1.102 +    assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 1)), {stream: true}), '');
   1.103 +    assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(1).concat(utf8))), string);
   1.104 +    assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 2)), {stream: true}), '');
   1.105 +    assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(2).concat(utf8))), string);
   1.106 +    var decoder16le = new TextDecoder('utf-16le');
   1.107 +    assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(0, 1)), {stream: true}), '');
   1.108 +    assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(1).concat(utf16le))), string);
   1.109 +    var decoder16be = new TextDecoder('utf-16be');
   1.110 +    assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(0, 1)), {stream: true}), '');
   1.111 +    assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(1).concat(utf16be))), string);
   1.112 +
   1.113 +    // mismatching BOMs
   1.114 +    assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16le_bom.concat(utf8))), string);
   1.115 +    assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16be_bom.concat(utf8))), string);
   1.116 +    assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf8_bom.concat(utf16le))), string);
   1.117 +    assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16be_bom.concat(utf16le))), string);
   1.118 +    assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf8_bom.concat(utf16be))), string);
   1.119 +    assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16le_bom.concat(utf16be))), string);
   1.120 +  },
   1.121 +  "Byte-order marks"
   1.122 +);
   1.123 +
   1.124 +test(
   1.125 +  function () {
   1.126 +    assert_equals(new TextDecoder("utf-8").encoding, "utf-8"); // canonical case
   1.127 +    assert_equals(new TextDecoder("UTF-16").encoding, "utf-16le"); // canonical case and name
   1.128 +    assert_equals(new TextDecoder("UTF-16BE").encoding, "utf-16be"); // canonical case and name
   1.129 +    assert_equals(new TextDecoder("iso8859-1").encoding, "windows-1252"); // canonical case and name
   1.130 +    assert_equals(new TextDecoder("iso-8859-1").encoding, "windows-1252"); // canonical case and name
   1.131 +  },
   1.132 +  "Encoding names"
   1.133 +);
   1.134 +
   1.135 +test(
   1.136 +  function () {
   1.137 +    ["utf-8", "utf-16le", "utf-16be"].forEach(function (encoding) {
   1.138 +      var string = "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF";
   1.139 +      var encoded = new TextEncoder(encoding).encode(string);
   1.140 +
   1.141 +      for (var len = 1; len <= 5; ++len) {
   1.142 +        var out = "", decoder = new TextDecoder(encoding);
   1.143 +        for (var i = 0; i < encoded.length; i += len) {
   1.144 +          var sub = [];
   1.145 +          for (var j = i; j < encoded.length && j < i + len; ++j) {
   1.146 +            sub.push(encoded[j]);
   1.147 +          }
   1.148 +          out += decoder.decode(new Uint8Array(sub), {stream: true});
   1.149 +        }
   1.150 +        out += decoder.decode();
   1.151 +        assert_equals(out, string, "streaming decode " + encoding);
   1.152 +      }
   1.153 +    });
   1.154 +  },
   1.155 +  "Streaming Decode"
   1.156 +);
   1.157 +
   1.158 +test(
   1.159 +  function () {
   1.160 +    var jis = [0x82, 0xC9, 0x82, 0xD9, 0x82, 0xF1];
   1.161 +    var expected = "\u306B\u307B\u3093"; // Nihon
   1.162 +    assert_equals(new TextDecoder("shift_jis").decode(new Uint8Array(jis)), expected);
   1.163 +  },
   1.164 +  "Shift_JIS Decode"
   1.165 +);
   1.166 +
   1.167 +test(
   1.168 +  function () {
   1.169 +    var encodings = ["utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"];
   1.170 +
   1.171 +    encodings.forEach(function (encoding) {
   1.172 +      var string = '', bytes = [];
   1.173 +      for (var i = 0; i < 128; ++i) {
   1.174 +
   1.175 +        // Encodings that have escape codes in 0x00-0x7F
   1.176 +        if (encoding === "hz-gb-2312" && i === 0x7E)
   1.177 +          continue;
   1.178 +        if (encoding === "iso-2022-jp" && i === 0x1B)
   1.179 +          continue;
   1.180 +
   1.181 +        string += String.fromCharCode(i);
   1.182 +        bytes.push(i);
   1.183 +      }
   1.184 +      var ascii_encoded = new TextEncoder('utf-8').encode(string);
   1.185 +      assert_equals(new TextDecoder(encoding).decode(ascii_encoded), string, encoding);
   1.186 +      //assert_array_equals(new TextEncoder(encoding).encode(string), bytes, encoding);
   1.187 +    });
   1.188 +  },
   1.189 +  "Supersets of ASCII decode ASCII correctly"
   1.190 +);
   1.191 +
   1.192 +test(
   1.193 +  function () {
   1.194 +    assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-8", {fatal: true}).decode(new Uint8Array([0xff])); });
   1.195 +    // This should not hang:
   1.196 +    new TextDecoder("utf-8").decode(new Uint8Array([0xff]));
   1.197 +
   1.198 +    assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-16", {fatal: true}).decode(new Uint8Array([0x00])); });
   1.199 +    // This should not hang:
   1.200 +    new TextDecoder("utf-16").decode(new Uint8Array([0x00]));
   1.201 +
   1.202 +    assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-16be", {fatal: true}).decode(new Uint8Array([0x00])); });
   1.203 +    // This should not hang:
   1.204 +    new TextDecoder("utf-16be").decode(new Uint8Array([0x00]));
   1.205 +  },
   1.206 +  "Non-fatal errors at EOF"
   1.207 +);
   1.208 +
   1.209 +test(
   1.210 +  function () {
   1.211 +
   1.212 +    var utf_encodings = ["utf-8", "utf-16le", "utf-16be"];
   1.213 +
   1.214 +    var legacy_encodings = ["ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"];
   1.215 +
   1.216 +    utf_encodings.forEach(function(encoding) {
   1.217 +      assert_equals(new TextDecoder(encoding).encoding, encoding);
   1.218 +      assert_equals(new TextEncoder(encoding).encoding, encoding);
   1.219 +    });
   1.220 +
   1.221 +    legacy_encodings.forEach(function(encoding) {
   1.222 +      assert_equals(new TextDecoder(encoding).encoding, encoding);
   1.223 +      assert_throws({name: 'TypeError'}, function() { new TextEncoder(encoding); });
   1.224 +    });
   1.225 +  },
   1.226 +  "Non-UTF encodings supported only for decode, not encode"
   1.227 +);

mercurial