1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/dom/encoding/test/unit/test_misc.js Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,224 @@ 1.4 +// NOTE: Requires testharness.js 1.5 +// http://www.w3.org/2008/webapps/wiki/Harness 1.6 + 1.7 +test( 1.8 + function() { 1.9 + var badStrings = [ 1.10 + { input: '\ud800', expected: '\ufffd' }, // Surrogate half 1.11 + { input: '\udc00', expected: '\ufffd' }, // Surrogate half 1.12 + { input: 'abc\ud800def', expected: 'abc\ufffddef' }, // Surrogate half 1.13 + { input: 'abc\udc00def', expected: 'abc\ufffddef' }, // Surrogate half 1.14 + { input: '\udc00\ud800', expected: '\ufffd\ufffd' } // Wrong order 1.15 + ]; 1.16 + 1.17 + badStrings.forEach( 1.18 + function(t) { 1.19 + var encoded = new TextEncoder('utf-8').encode(t.input); 1.20 + var decoded = new TextDecoder('utf-8').decode(encoded); 1.21 + assert_equals(t.expected, decoded); 1.22 + }); 1.23 + }, 1.24 + "bad data" 1.25 +); 1.26 + 1.27 +test( 1.28 + function() { 1.29 + var bad = [ 1.30 + { encoding: 'utf-8', input: [0xC0] }, // ends early 1.31 + { encoding: 'utf-8', input: [0xC0, 0x00] }, // invalid trail 1.32 + { encoding: 'utf-8', input: [0xC0, 0xC0] }, // invalid trail 1.33 + { encoding: 'utf-8', input: [0xE0] }, // ends early 1.34 + { encoding: 'utf-8', input: [0xE0, 0x00] }, // invalid trail 1.35 + { encoding: 'utf-8', input: [0xE0, 0xC0] }, // invalid trail 1.36 + { encoding: 'utf-8', input: [0xE0, 0x80, 0x00] }, // invalid trail 1.37 + { encoding: 'utf-8', input: [0xE0, 0x80, 0xC0] }, // invalid trail 1.38 + { encoding: 'utf-8', input: [0xFC, 0x80, 0x80, 0x80, 0x80, 0x80] }, // > 0x10FFFF 1.39 + { encoding: 'utf-16le', input: [0x00] }, // truncated code unit 1.40 + { encoding: 'utf-16le', input: [0x00, 0xd8] }, // surrogate half 1.41 + { encoding: 'utf-16le', input: [0x00, 0xd8, 0x00, 0x00] }, // surrogate half 1.42 + { encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0x00] }, // trail surrogate 1.43 + { encoding: 'utf-16le', input: [0x00, 0xdc, 0x00, 0xd8] } // swapped surrogates 1.44 + // TODO: Single byte encoding cases 1.45 + ]; 1.46 + 1.47 + bad.forEach( 1.48 + function(t) { 1.49 + assert_throws({name: 'EncodingError'}, function () { 1.50 + new TextDecoder(t.encoding, {fatal: true}).decode(new Uint8Array(t.input)); 1.51 + }); 1.52 + }); 1.53 + }, 1.54 + "fatal flag" 1.55 +); 1.56 + 1.57 +test( 1.58 + function() { 1.59 + var encodings = [ 1.60 + { label: 'utf-8', encoding: 'utf-8' }, 1.61 + { label: 'utf-16', encoding: 'utf-16le' }, 1.62 + { label: 'utf-16le', encoding: 'utf-16le' }, 1.63 + { label: 'utf-16be', encoding: 'utf-16be' }, 1.64 + { label: 'ascii', encoding: 'windows-1252' }, 1.65 + { label: 'iso-8859-1', encoding: 'windows-1252' } 1.66 + ]; 1.67 + 1.68 + encodings.forEach( 1.69 + function(test) { 1.70 + assert_equals(new TextDecoder(test.label.toLowerCase()).encoding, test.encoding); 1.71 + assert_equals(new TextDecoder(test.label.toUpperCase()).encoding, test.encoding); 1.72 + }); 1.73 + }, 1.74 + "Encoding names are case insensitive" 1.75 +); 1.76 + 1.77 +test( 1.78 + function() { 1.79 + var utf8_bom = [0xEF, 0xBB, 0xBF]; 1.80 + var utf8 = [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD]; 1.81 + 1.82 + var utf16le_bom = [0xff, 0xfe]; 1.83 + var utf16le = [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF]; 1.84 + 1.85 + var utf16be_bom = [0xfe, 0xff]; 1.86 + var utf16be = [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD]; 1.87 + 1.88 + var string = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD"; // z, cent, CJK water, G-Clef, Private-use character 1.89 + 1.90 + // missing BOMs 1.91 + assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8)), string); 1.92 + assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le)), string); 1.93 + assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be)), string); 1.94 + 1.95 + // matching BOMs 1.96 + assert_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf8_bom.concat(utf8))), string); 1.97 + assert_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16le_bom.concat(utf16le))), string) 1.98 + assert_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16be_bom.concat(utf16be))), string); 1.99 + 1.100 + // matching BOMs split 1.101 + var decoder8 = new TextDecoder('utf-8'); 1.102 + assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 1)), {stream: true}), ''); 1.103 + assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(1).concat(utf8))), string); 1.104 + assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(0, 2)), {stream: true}), ''); 1.105 + assert_equals(decoder8.decode(new Uint8Array(utf8_bom.slice(2).concat(utf8))), string); 1.106 + var decoder16le = new TextDecoder('utf-16le'); 1.107 + assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(0, 1)), {stream: true}), ''); 1.108 + assert_equals(decoder16le.decode(new Uint8Array(utf16le_bom.slice(1).concat(utf16le))), string); 1.109 + var decoder16be = new TextDecoder('utf-16be'); 1.110 + assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(0, 1)), {stream: true}), ''); 1.111 + assert_equals(decoder16be.decode(new Uint8Array(utf16be_bom.slice(1).concat(utf16be))), string); 1.112 + 1.113 + // mismatching BOMs 1.114 + assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16le_bom.concat(utf8))), string); 1.115 + assert_not_equals(new TextDecoder('utf-8').decode(new Uint8Array(utf16be_bom.concat(utf8))), string); 1.116 + assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf8_bom.concat(utf16le))), string); 1.117 + assert_not_equals(new TextDecoder('utf-16le').decode(new Uint8Array(utf16be_bom.concat(utf16le))), string); 1.118 + assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf8_bom.concat(utf16be))), string); 1.119 + assert_not_equals(new TextDecoder('utf-16be').decode(new Uint8Array(utf16le_bom.concat(utf16be))), string); 1.120 + }, 1.121 + "Byte-order marks" 1.122 +); 1.123 + 1.124 +test( 1.125 + function () { 1.126 + assert_equals(new TextDecoder("utf-8").encoding, "utf-8"); // canonical case 1.127 + assert_equals(new TextDecoder("UTF-16").encoding, "utf-16le"); // canonical case and name 1.128 + assert_equals(new TextDecoder("UTF-16BE").encoding, "utf-16be"); // canonical case and name 1.129 + assert_equals(new TextDecoder("iso8859-1").encoding, "windows-1252"); // canonical case and name 1.130 + assert_equals(new TextDecoder("iso-8859-1").encoding, "windows-1252"); // canonical case and name 1.131 + }, 1.132 + "Encoding names" 1.133 +); 1.134 + 1.135 +test( 1.136 + function () { 1.137 + ["utf-8", "utf-16le", "utf-16be"].forEach(function (encoding) { 1.138 + var string = "\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF"; 1.139 + var encoded = new TextEncoder(encoding).encode(string); 1.140 + 1.141 + for (var len = 1; len <= 5; ++len) { 1.142 + var out = "", decoder = new TextDecoder(encoding); 1.143 + for (var i = 0; i < encoded.length; i += len) { 1.144 + var sub = []; 1.145 + for (var j = i; j < encoded.length && j < i + len; ++j) { 1.146 + sub.push(encoded[j]); 1.147 + } 1.148 + out += decoder.decode(new Uint8Array(sub), {stream: true}); 1.149 + } 1.150 + out += decoder.decode(); 1.151 + assert_equals(out, string, "streaming decode " + encoding); 1.152 + } 1.153 + }); 1.154 + }, 1.155 + "Streaming Decode" 1.156 +); 1.157 + 1.158 +test( 1.159 + function () { 1.160 + var jis = [0x82, 0xC9, 0x82, 0xD9, 0x82, 0xF1]; 1.161 + var expected = "\u306B\u307B\u3093"; // Nihon 1.162 + assert_equals(new TextDecoder("shift_jis").decode(new Uint8Array(jis)), expected); 1.163 + }, 1.164 + "Shift_JIS Decode" 1.165 +); 1.166 + 1.167 +test( 1.168 + function () { 1.169 + var encodings = ["utf-8", "ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"]; 1.170 + 1.171 + encodings.forEach(function (encoding) { 1.172 + var string = '', bytes = []; 1.173 + for (var i = 0; i < 128; ++i) { 1.174 + 1.175 + // Encodings that have escape codes in 0x00-0x7F 1.176 + if (encoding === "hz-gb-2312" && i === 0x7E) 1.177 + continue; 1.178 + if (encoding === "iso-2022-jp" && i === 0x1B) 1.179 + continue; 1.180 + 1.181 + string += String.fromCharCode(i); 1.182 + bytes.push(i); 1.183 + } 1.184 + var ascii_encoded = new TextEncoder('utf-8').encode(string); 1.185 + assert_equals(new TextDecoder(encoding).decode(ascii_encoded), string, encoding); 1.186 + //assert_array_equals(new TextEncoder(encoding).encode(string), bytes, encoding); 1.187 + }); 1.188 + }, 1.189 + "Supersets of ASCII decode ASCII correctly" 1.190 +); 1.191 + 1.192 +test( 1.193 + function () { 1.194 + assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-8", {fatal: true}).decode(new Uint8Array([0xff])); }); 1.195 + // This should not hang: 1.196 + new TextDecoder("utf-8").decode(new Uint8Array([0xff])); 1.197 + 1.198 + assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-16", {fatal: true}).decode(new Uint8Array([0x00])); }); 1.199 + // This should not hang: 1.200 + new TextDecoder("utf-16").decode(new Uint8Array([0x00])); 1.201 + 1.202 + assert_throws({name: 'EncodingError'}, function() { new TextDecoder("utf-16be", {fatal: true}).decode(new Uint8Array([0x00])); }); 1.203 + // This should not hang: 1.204 + new TextDecoder("utf-16be").decode(new Uint8Array([0x00])); 1.205 + }, 1.206 + "Non-fatal errors at EOF" 1.207 +); 1.208 + 1.209 +test( 1.210 + function () { 1.211 + 1.212 + var utf_encodings = ["utf-8", "utf-16le", "utf-16be"]; 1.213 + 1.214 + var legacy_encodings = ["ibm866", "iso-8859-2", "iso-8859-3", "iso-8859-4", "iso-8859-5", "iso-8859-6", "iso-8859-7", "iso-8859-8", "iso-8859-8-i", "iso-8859-10", "iso-8859-13", "iso-8859-14", "iso-8859-15", "iso-8859-16", "koi8-r", "koi8-u", "macintosh", "windows-874", "windows-1250", "windows-1251", "windows-1252", "windows-1253", "windows-1254", "windows-1255", "windows-1256", "windows-1257", "windows-1258", "x-mac-cyrillic", "gbk", "gb18030", "hz-gb-2312", "big5", "euc-jp", "iso-2022-jp", "shift_jis", "euc-kr", "x-user-defined"]; 1.215 + 1.216 + utf_encodings.forEach(function(encoding) { 1.217 + assert_equals(new TextDecoder(encoding).encoding, encoding); 1.218 + assert_equals(new TextEncoder(encoding).encoding, encoding); 1.219 + }); 1.220 + 1.221 + legacy_encodings.forEach(function(encoding) { 1.222 + assert_equals(new TextDecoder(encoding).encoding, encoding); 1.223 + assert_throws({name: 'TypeError'}, function() { new TextEncoder(encoding); }); 1.224 + }); 1.225 + }, 1.226 + "Non-UTF encodings supported only for decode, not encode" 1.227 +);