dom/encoding/test/test_BOMEncoding.js

Fri, 16 Jan 2015 18:13:44 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Fri, 16 Jan 2015 18:13:44 +0100
branch
TOR_BUG_9701
changeset 14
925c144e1f1f
permissions
-rw-r--r--

Integrate suggestion from review to improve consistency with existing code.

michael@0 1 /*
michael@0 2 * test_TextDecoderBOMEncoding.js
michael@0 3 * bug 764234 tests
michael@0 4 */
michael@0 5
michael@0 6 function runTextDecoderBOMEnoding()
michael@0 7 {
michael@0 8 test(testDecodeValidBOMUTF16, "testDecodeValidBOMUTF16");
michael@0 9 test(testBOMEncodingUTF8, "testBOMEncodingUTF8");
michael@0 10 test(testMoreBOMEncoding, "testMoreBOMEncoding");
michael@0 11 }
michael@0 12
michael@0 13 function testDecodeValidBOMUTF16() {
michael@0 14 var expectedString = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\"";
michael@0 15
michael@0 16 // Testing UTF-16BE
michael@0 17 var data = [0xFE, 0xFF, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22];
michael@0 18 testBOMCharset({encoding: "utf-16be", data: data, expected: expectedString,
michael@0 19 msg: "decoder valid UTF-16BE test."});
michael@0 20 }
michael@0 21
michael@0 22 function testBOMEncodingUTF8() {
michael@0 23
michael@0 24 // basic utf-8 test with valid encoding and byte stream. no byte om provided.
michael@0 25 var data = [0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
michael@0 26 var expectedString = " !\"#$%&'";
michael@0 27 testBOMCharset({encoding: "utf-8", data: data, expected: expectedString,
michael@0 28 msg: "utf-8 encoding."});
michael@0 29
michael@0 30 // test valid encoding provided with valid byte OM also provided.
michael@0 31 data = [0xEF, 0xBB, 0xBF, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
michael@0 32 expectedString = " !\"#$%&'";
michael@0 33 testBOMCharset({encoding: "utf-8", data: data, expected: expectedString,
michael@0 34 msg: "valid utf-8 encoding provided with VALID utf-8 BOM test."});
michael@0 35
michael@0 36 // test valid encoding provided with invalid byte OM also provided.
michael@0 37 data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
michael@0 38 testBOMCharset({encoding: "utf-8", fatal: true, data: data, error: "EncodingError",
michael@0 39 msg: "valid utf-8 encoding provided with invalid utf-8 fatal BOM test."});
michael@0 40
michael@0 41 // test valid encoding provided with invalid byte OM also provided.
michael@0 42 data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
michael@0 43 expectedString = "\ufffd\ufffd !\"#$%&'";
michael@0 44 testBOMCharset({encoding: "utf-8", data: data, expected: expectedString,
michael@0 45 msg: "valid utf-8 encoding provided with invalid utf-8 BOM test."});
michael@0 46
michael@0 47 // test empty encoding provided with invalid byte OM also provided.
michael@0 48 data = [0xFF, 0xFE, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27];
michael@0 49 testBOMCharset({encoding: "", data: data, error: "TypeError",
michael@0 50 msg: "empty encoding provided with invalid utf-8 BOM test."});
michael@0 51 }
michael@0 52
michael@0 53 function testMoreBOMEncoding() {
michael@0 54 var expectedString = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\"";
michael@0 55
michael@0 56 // Testing user provided encoding is UTF-16BE & bom encoding is utf-16le
michael@0 57 var data = [0xFF, 0xFE, 0x00, 0x22, 0x04, 0x12, 0x04, 0x41, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x30, 0x00, 0x20, 0x04, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x00, 0x2C, 0x00, 0x20, 0x04, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x00, 0x20, 0x04, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x00, 0x20, 0x04, 0x3F, 0x04, 0x3E, 0x00, 0x2D, 0x04, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x00, 0x2E, 0x00, 0x22];
michael@0 58
michael@0 59 testBOMCharset({encoding: "utf-16be", fatal: true, data: data, expected: "\ufffe" + expectedString,
michael@0 60 msg: "test decoder invalid BOM encoding for utf-16be fatal."});
michael@0 61
michael@0 62 testBOMCharset({encoding: "utf-16be", data: data, expected: "\ufffe" + expectedString,
michael@0 63 msg: "test decoder invalid BOM encoding for utf-16be."});
michael@0 64
michael@0 65 // Testing user provided encoding is UTF-16LE & bom encoding is utf-16be
michael@0 66 var dataUTF16 = [0xFE, 0xFF, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00];
michael@0 67 testBOMCharset({encoding: "utf-16le", fatal: true, data: dataUTF16, expected: "\ufffe" + expectedString,
michael@0 68 msg: "test decoder invalid BOM encoding for utf-16le fatal."});
michael@0 69
michael@0 70 testBOMCharset({encoding: "utf-16le", data: dataUTF16, expected: "\ufffe" + expectedString,
michael@0 71 msg: "test decoder invalid BOM encoding for utf-16le."});
michael@0 72
michael@0 73 // Testing user provided encoding is UTF-16 & bom encoding is utf-16be
michael@0 74 testBOMCharset({encoding: "utf-16", fatal: true, data: dataUTF16, expected: "\ufffe" + expectedString,
michael@0 75 msg: "test decoder invalid BOM encoding for utf-16 fatal."});
michael@0 76
michael@0 77 testBOMCharset({encoding: "utf-16", data: dataUTF16, expected: "\ufffe" + expectedString,
michael@0 78 msg: "test decoder invalid BOM encoding for utf-16."});
michael@0 79
michael@0 80 // Testing user provided encoding is UTF-16 & bom encoding is utf-16le
michael@0 81 dataUTF16 = [0xFF, 0xFE, 0x22, 0x00, 0x12, 0x04, 0x41, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x4B, 0x04, 0x35, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x38, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x45, 0x04, 0x3E, 0x04, 0x36, 0x04, 0x38, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x30, 0x04, 0x20, 0x00, 0x34, 0x04, 0x40, 0x04, 0x43, 0x04, 0x33, 0x04, 0x30, 0x04, 0x2C, 0x00, 0x20, 0x00, 0x3A, 0x04, 0x30, 0x04, 0x36, 0x04, 0x34, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x41, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x4C, 0x04, 0x4F, 0x04, 0x20, 0x00, 0x3D, 0x04, 0x35, 0x04, 0x41, 0x04, 0x47, 0x04, 0x30, 0x04, 0x41, 0x04, 0x42, 0x04, 0x3B, 0x04, 0x38, 0x04, 0x32, 0x04, 0x30, 0x04, 0x20, 0x00, 0x3F, 0x04, 0x3E, 0x04, 0x2D, 0x00, 0x41, 0x04, 0x32, 0x04, 0x3E, 0x04, 0x35, 0x04, 0x3C, 0x04, 0x43, 0x04, 0x2E, 0x00, 0x22, 0x00];
michael@0 82 testBOMCharset({encoding: "utf-16", fatal: true, data: dataUTF16, expected: expectedString,
michael@0 83 msg: "test decoder BOM encoding for utf-16 fatal."});
michael@0 84
michael@0 85 testBOMCharset({encoding: "utf-16", data: dataUTF16, expected: expectedString,
michael@0 86 msg: "test decoder BOM encoding for utf-16."});
michael@0 87
michael@0 88 // Testing user provided encoding is UTF-8 & bom encoding is utf-16be
michael@0 89 data = [0xFE, 0xFF, 0x22, 0xd0, 0x92, 0xd1, 0x81, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd1, 0x8b, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd0, 0xb8, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0xd1, 0x85, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb8, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0x20, 0xd0, 0xbd, 0xd0, 0xb0, 0x20, 0xd0, 0xb4, 0xd1, 0x80, 0xd1, 0x83, 0xd0, 0xb3, 0xd0, 0xb0, 0x2c, 0x20, 0xd0, 0xba, 0xd0, 0xb0, 0xd0, 0xb6, 0xd0, 0xb4, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0xd1, 0x8f, 0x20, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x8c, 0xd1, 0x8f, 0x20, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x87, 0xd0, 0xb0, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbb, 0xd0, 0xb8, 0xd0, 0xb2, 0xd0, 0xb0, 0x20, 0xd0, 0xbf, 0xd0, 0xbe, 0x2d, 0xd1, 0x81, 0xd0, 0xb2, 0xd0, 0xbe, 0xd0, 0xb5, 0xd0, 0xbc, 0xd1, 0x83, 0x2e, 0x22];
michael@0 90
michael@0 91 testBOMCharset({encoding: "utf-8", fatal: true, data: data, error: "EncodingError",
michael@0 92 msg: "test decoder invalid BOM encoding for valid utf-8 fatal provided label."});
michael@0 93
michael@0 94 testBOMCharset({encoding: "utf-8", data: data, expected: "\ufffd\ufffd" + expectedString,
michael@0 95 msg: "test decoder invalid BOM encoding for valid utf-8 provided label."});
michael@0 96
michael@0 97 // Testing user provided encoding is non-UTF & bom encoding is utf-16be
michael@0 98 data = [0xFE, 0xFF, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe];
michael@0 99
michael@0 100 expectedString = "\u03CE\uFFFD\u2019\xA3\u20AC\u20AF\xA6\xA7\xA8\xA9\u037A\xAB\xAC\xAD\u2015"
michael@0 101 + "\xB0\xB1\xB2\xB3\u0384\u0385\u0386\xB7\u0388\u0389\u038A\xBB\u038C\xBD\u038E\u038F"
michael@0 102 + "\u0390\u0391\u0392\u0393\u0394\u0395\u0396\u0397\u0398\u0399\u039A\u039B\u039C\u039D\u039E\u039F"
michael@0 103 + "\u03A0\u03A1\u03A3\u03A4\u03A5\u03A6\u03A7\u03A8\u03A9\u03AA\u03AB\u03AC\u03AD\u03AE\u03AF"
michael@0 104 + "\u03B0\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB\u03BC\u03BD\u03BE\u03BF"
michael@0 105 + "\u03C0\u03C1\u03C2\u03C3\u03C4\u03C5\u03C6\u03C7\u03C8\u03C9\u03CA\u03CB\u03CC\u03CD\u03CE";
michael@0 106
michael@0 107 testBOMCharset({encoding: "greek", fatal: true, data: data, error: "EncodingError",
michael@0 108 msg: "test decoder encoding provided with invalid BOM encoding for greek."});
michael@0 109
michael@0 110 testBOMCharset({encoding: "greek", data: data, expected: expectedString,
michael@0 111 msg: "test decoder encoding provided with invalid BOM encoding for greek."});
michael@0 112 }
michael@0 113
michael@0 114 function testBOMCharset(test)
michael@0 115 {
michael@0 116 var outText;
michael@0 117 try {
michael@0 118 var decoder = 'fatal' in test ?
michael@0 119 new TextDecoder(test.encoding, {fatal: test.fatal}) :
michael@0 120 new TextDecoder(test.encoding);
michael@0 121 outText = decoder.decode(new Uint8Array(test.data));
michael@0 122 } catch (e) {
michael@0 123 assert_equals(e.name, test.error, test.msg);
michael@0 124 return;
michael@0 125 }
michael@0 126 assert_true(!test.error, test.msg);
michael@0 127
michael@0 128 if (outText !== test.expected) {
michael@0 129 assert_equals(escape(outText), escape(test.expected), test.msg + " Code points do not match expected code points.");
michael@0 130 }
michael@0 131 }
michael@0 132

mercurial