The Tor Browser: dom/encoding/test/unit/test

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // NOTE: Requires testharness.js

     2 // http://www.w3.org/2008/webapps/wiki/Harness

     4 // Extension to testharness.js API which avoids logging enormous strings

     5 // on a coding failure.

     6 function assert_string_equals(actual, expected, description) {

     7   // short circuit success case

     8   if (actual === expected) {

     9     assert_true(true, description + ": <actual> === <expected>");

    10     return;

    11   }

    13   // length check

    14   assert_equals(actual.length, expected.length,

    15                 description + ": string lengths")

    17   var i, a, b;

    18   for (i = 0; i < actual.length; i++) {

    19     a = actual.charCodeAt(i);

    20     b = expected.charCodeAt(i);

    21     if (a !== b)

    22       assert_true(false,

    23                   description +

    24                   ": code unit " + i.toString() + " unequal: " +

    25                   cpname(a) + " != " + cpname(b)); // doesn't return

    26   }

    28   // It should be impossible to get here, because the initial

    29   // comparison failed, so either the length comparison or the

    30   // codeunit-by-codeunit comparison should also fail.

    31   assert_true(false, description + ": failed to detect string difference");

    32 }

    34 // Inspired by:

    35 // http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.html

    36 function encode_utf8(string) {

    37   var utf8 = unescape(encodeURIComponent(string));

    38   var octets = new Uint8Array(utf8.length), i;

    39   for (i = 0; i < utf8.length; i += 1) {

    40     octets[i] = utf8.charCodeAt(i);

    41   }

    42   return octets;

    43 }

    45 function decode_utf8(octets) {

    46   var utf8 = String.fromCharCode.apply(null, octets);

    47   return decodeURIComponent(escape(utf8));

    48 }

    50 // Helpers for test_utf_roundtrip.

    51 function cpname(n) {

    52   if (n+0 !== n)

    53     return n.toString();

    54   var w = (n <= 0xFFFF) ? 4 : 6;

    55   return 'U+' + ('000000' + n.toString(16).toUpperCase()).slice(-w);

    56 }

    58 function genblock(from, len) {

    59   var i, j, point, offset;

    60   var size, block;

    62   // determine size required:

    63   //    1 unit   for each point from U+000000 through U+00D7FF

    64   //    0 units                      U+00D800 through U+00DFFF

    65   //    1 unit                       U+00E000 through U+00FFFF

    66   //    2 units                      U+010000 through U+10FFFF

    67   function overlap(min1, max1, min2, max2) {

    68     return Math.max(0, Math.min(max1, max2) - Math.max(min1, min2));

    69   }

    70   size = (overlap(from, from+len, 0x000000, 0x00D800) +

    71           overlap(from, from+len, 0x00E000, 0x010000) +

    72           overlap(from, from+len, 0x010000, 0x110000)*2);

    74   block = new Uint16Array(size);

    75   for (i = 0, j = 0; i < len; i++) {

    76     point = from + i;

    77     if (0xD800 <= point && point <= 0xDFFF)

    78       continue;

    79     else if (point <= 0xFFFF)

    80       block[j++] = point;

    81     else {

    82       offset = point - 0x10000;

    83       block[j++] = 0xD800 + (offset >> 10);

    84       block[j++] = 0xDC00 + (offset & 0x3FF);

    85     }

    86   }

    87   return String.fromCharCode.apply(null, block);

    88 }

    90 function test_utf_roundtrip () {

    91   var MIN_CODEPOINT = 0;

    92   var MAX_CODEPOINT = 0x10FFFF;

    93   var BLOCK_SIZE = 0x1000;

    95   var block, block_tag, i, j, encoded, decoded, exp_encoded, exp_decoded;

    97   var TE_U16LE = new TextEncoder("UTF-16LE");

    98   var TD_U16LE = new TextDecoder("UTF-16LE");

   100   var TE_U16BE = new TextEncoder("UTF-16BE");

   101   var TD_U16BE = new TextDecoder("UTF-16BE");

   103   var TE_U8    = new TextEncoder("UTF-8");

   104   var TD_U8    = new TextDecoder("UTF-8");

   106   for (i = MIN_CODEPOINT; i < MAX_CODEPOINT; i += BLOCK_SIZE) {

   107     block_tag = cpname(i) + " - " + cpname(i + BLOCK_SIZE - 1);

   108     block = genblock(i, BLOCK_SIZE);

   110     // test UTF-16LE, UTF-16BE, and UTF-8 encodings against themselves

   111     encoded = TE_U16LE.encode(block);

   112     decoded = TD_U16LE.decode(encoded);

   113     assert_string_equals(block, decoded, "UTF-16LE round trip " + block_tag);

   115     encoded = TE_U16BE.encode(block);

   116     decoded = TD_U16BE.decode(encoded);

   117     assert_string_equals(block, decoded, "UTF-16BE round trip " + block_tag);

   119     encoded = TE_U8.encode(block);

   120     decoded = TD_U8.decode(encoded);

   121     assert_string_equals(block, decoded, "UTF-8 round trip " + block_tag);

   123     // test TextEncoder(UTF-8) against the older idiom

   124     exp_encoded = encode_utf8(block);

   125     assert_array_equals(encoded, exp_encoded,

   126                         "UTF-8 reference encoding " + block_tag);

   128     exp_decoded = decode_utf8(exp_encoded);

   129     assert_string_equals(decoded, exp_decoded,

   130                          "UTF-8 reference decoding " + block_tag);

   131   }

   132 }

   134 function test_utf_samples () {

   135   // z, cent, CJK water, G-Clef, Private-use character

   136   var sample = "z\xA2\u6C34\uD834\uDD1E\uDBFF\uDFFD";

   137   var cases = [

   138     { encoding: "utf-8",

   139       expected: [0x7A, 0xC2, 0xA2, 0xE6, 0xB0, 0xB4, 0xF0, 0x9D, 0x84, 0x9E, 0xF4, 0x8F, 0xBF, 0xBD] },

   140     { encoding: "utf-16le",

   141       expected: [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF] },

   142     { encoding: "utf-16",

   143       expected: [0x7A, 0x00, 0xA2, 0x00, 0x34, 0x6C, 0x34, 0xD8, 0x1E, 0xDD, 0xFF, 0xDB, 0xFD, 0xDF] },

   144     { encoding: "utf-16be",

   145       expected: [0x00, 0x7A, 0x00, 0xA2, 0x6C, 0x34, 0xD8, 0x34, 0xDD, 0x1E, 0xDB, 0xFF, 0xDF, 0xFD] }

   146   ];

   148   cases.forEach(

   149     function(t) {

   150       var encoded = new TextEncoder(t.encoding).encode(sample);

   151       assert_array_equals(encoded, t.expected,

   152                           "expected equal encodings - " + t.encoding);

   154       var decoded = new TextDecoder(t.encoding)

   155                         .decode(new Uint8Array(t.expected));

   156       assert_equals(decoded, sample,

   157                     "expected equal decodings - " + t.encoding);

   158     });

   159 }

   161 test(test_utf_samples,

   162      "UTF-8, UTF-16LE, UTF-16BE - Encode/Decode - reference sample");

   164 test(test_utf_roundtrip,

   165      "UTF-8, UTF-16LE, UTF-16BE - Encode/Decode - full roundtrip and "+

   166      "agreement with encode/decodeURIComponent");

The Tor Browser / file revision