1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/dom/encoding/test/test_BOMEncoding.js Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,132 @@ 1.4 +/* 1.5 + * test_TextDecoderBOMEncoding.js 1.6 + * bug 764234 tests 1.7 +*/ 1.8 + 1.9 +function runTextDecoderBOMEnoding() 1.10 +{ 1.11 + test(testDecodeValidBOMUTF16, "testDecodeValidBOMUTF16"); 1.12 + test(testBOMEncodingUTF8, "testBOMEncodingUTF8"); 1.13 + test(testMoreBOMEncoding, "testMoreBOMEncoding"); 1.14 +} 1.15 + 1.16 +function testDecodeValidBOMUTF16() { 1.17 + var expectedString = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\""; 1.18 + 1.19 + // Testing UTF-16BE 1.20 + var data = [0xFE, 0xFF, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22]; 1.21 + testBOMCharset({encoding: "utf-16be", data: data, expected: expectedString, 1.22 + msg: "decoder valid UTF-16BE test."}); 1.23 +} 1.24 + 1.25 +function testBOMEncodingUTF8() { 1.26 + 1.27 + // basic utf-8 test with valid encoding and byte stream. no byte om provided. 1.28 + var data = [0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 1.29 + var expectedString = " !\"#$%&'"; 1.30 + testBOMCharset({encoding: "utf-8", data: data, expected: expectedString, 1.31 + msg: "utf-8 encoding."}); 1.32 + 1.33 + // test valid encoding provided with valid byte OM also provided. 1.34 + data = [0xEF, 0xBB, 0xBF, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 1.35 + expectedString = " !\"#$%&'"; 1.36 + testBOMCharset({encoding: "utf-8", data: data, expected: expectedString, 1.37 + msg: "valid utf-8 encoding provided with VALID utf-8 BOM test."}); 1.38 + 1.39 + // test valid encoding provided with invalid byte OM also provided. 1.40 + data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 1.41 + testBOMCharset({encoding: "utf-8", fatal: true, data: data, error: "EncodingError", 1.42 + msg: "valid utf-8 encoding provided with invalid utf-8 fatal BOM test."}); 1.43 + 1.44 + // test valid encoding provided with invalid byte OM also provided. 1.45 + data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 1.46 + expectedString = "\ufffd\ufffd !\"#$%&'"; 1.47 + testBOMCharset({encoding: "utf-8", data: data, expected: expectedString, 1.48 + msg: "valid utf-8 encoding provided with invalid utf-8 BOM test."}); 1.49 + 1.50 + // test empty encoding provided with invalid byte OM also provided. 1.51 + data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27]; 1.52 + testBOMCharset({encoding: "", data: data, error: "TypeError", 1.53 + msg: "empty encoding provided with invalid utf-8 BOM test."}); 1.54 +} 1.55 + 1.56 +function testMoreBOMEncoding() { 1.57 + var expectedString = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\""; 1.58 + 1.59 + // Testing user provided encoding is UTF-16BE & bom encoding is utf-16le 1.60 + var data = [0xFF, 0xFE, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22]; 1.61 + 1.62 + testBOMCharset({encoding: "utf-16be", fatal: true, data: data, expected: "\ufffe" + expectedString, 1.63 + msg: "test decoder invalid BOM encoding for utf-16be fatal."}); 1.64 + 1.65 + testBOMCharset({encoding: "utf-16be", data: data, expected: "\ufffe" + expectedString, 1.66 + msg: "test decoder invalid BOM encoding for utf-16be."}); 1.67 + 1.68 + // Testing user provided encoding is UTF-16LE & bom encoding is utf-16be 1.69 + var dataUTF16 = [0xFE, 0xFF, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00]; 1.70 + testBOMCharset({encoding: "utf-16le", fatal: true, data: dataUTF16, expected: "\ufffe" + expectedString, 1.71 + msg: "test decoder invalid BOM encoding for utf-16le fatal."}); 1.72 + 1.73 + testBOMCharset({encoding: "utf-16le", data: dataUTF16, expected: "\ufffe" + expectedString, 1.74 + msg: "test decoder invalid BOM encoding for utf-16le."}); 1.75 + 1.76 + // Testing user provided encoding is UTF-16 & bom encoding is utf-16be 1.77 + testBOMCharset({encoding: "utf-16", fatal: true, data: dataUTF16, expected: "\ufffe" + expectedString, 1.78 + msg: "test decoder invalid BOM encoding for utf-16 fatal."}); 1.79 + 1.80 + testBOMCharset({encoding: "utf-16", data: dataUTF16, expected: "\ufffe" + expectedString, 1.81 + msg: "test decoder invalid BOM encoding for utf-16."}); 1.82 + 1.83 + // Testing user provided encoding is UTF-16 & bom encoding is utf-16le 1.84 + dataUTF16 = [0xFF, 0xFE, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00]; 1.85 + testBOMCharset({encoding: "utf-16", fatal: true, data: dataUTF16, expected: expectedString, 1.86 + msg: "test decoder BOM encoding for utf-16 fatal."}); 1.87 + 1.88 + testBOMCharset({encoding: "utf-16", data: dataUTF16, expected: expectedString, 1.89 + msg: "test decoder BOM encoding for utf-16."}); 1.90 + 1.91 + // Testing user provided encoding is UTF-8 & bom encoding is utf-16be 1.92 + data = [0xFE, 0xFF, 0x22, 0xd0, 0x92, 0xd1, 0x81, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd1, 0x8b, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd0, 0xb8, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0xd1, 0x85, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb8, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0x20, 0xd0, 0xbd, 0xd0, 0xb0, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0xd0, 0xb0, 0x2c, 0x20, 0xd0, 0xba, 0xd0, 0xb0, 0xd0, 0xb6, 0xd0, 0xb4, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0x2d, 0xd1, 0x81, 0xd0, 0xb2, 0xd0, 0xbe, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x83, 0x2e, 0x22]; 1.93 + 1.94 + testBOMCharset({encoding: "utf-8", fatal: true, data: data, error: "EncodingError", 1.95 + msg: "test decoder invalid BOM encoding for valid utf-8 fatal provided label."}); 1.96 + 1.97 + testBOMCharset({encoding: "utf-8", data: data, expected: "\ufffd\ufffd" + expectedString, 1.98 + msg: "test decoder invalid BOM encoding for valid utf-8 provided label."}); 1.99 + 1.100 + // Testing user provided encoding is non-UTF & bom encoding is utf-16be 1.101 + data = [0xFE, 0xFF, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe]; 1.102 + 1.103 + expectedString = "\u03CE\uFFFD\u2019\xA3\u20AC\u20AF\xA6\xA7\xA8\xA9\u037A\xAB\xAC\xAD\u2015" 1.104 + + "\xB0\xB1\xB2\xB3\u0384\u0385\u0386\xB7\u0388\u0389\u038A\xBB\u038C\xBD\u038E\u038F" 1.105 + + "\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F" 1.106 + + "\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03AA\u03AB\u03AC\u03AD\u03AE\u03AF" 1.107 + + "\u03B0\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF" 1.108 + + "\u03C0\u03C1\u03C2\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u03CA\u03CB\u03CC\u03CD\u03CE"; 1.109 + 1.110 + testBOMCharset({encoding: "greek", fatal: true, data: data, error: "EncodingError", 1.111 + msg: "test decoder encoding provided with invalid BOM encoding for greek."}); 1.112 + 1.113 + testBOMCharset({encoding: "greek", data: data, expected: expectedString, 1.114 + msg: "test decoder encoding provided with invalid BOM encoding for greek."}); 1.115 +} 1.116 + 1.117 +function testBOMCharset(test) 1.118 +{ 1.119 + var outText; 1.120 + try { 1.121 + var decoder = 'fatal' in test ? 1.122 + new TextDecoder(test.encoding, {fatal: test.fatal}) : 1.123 + new TextDecoder(test.encoding); 1.124 + outText = decoder.decode(new Uint8Array(test.data)); 1.125 + } catch (e) { 1.126 + assert_equals(e.name, test.error, test.msg); 1.127 + return; 1.128 + } 1.129 + assert_true(!test.error, test.msg); 1.130 + 1.131 + if (outText !== test.expected) { 1.132 + assert_equals(escape(outText), escape(test.expected), test.msg + " Code points do not match expected code points."); 1.133 + } 1.134 +} 1.135 +