intl/uconv/tests/unit/test_bug340714.js

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/tests/unit/test_bug340714.js	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,103 @@
     1.4 +/* Test case for bug 340714
     1.5 + *
     1.6 + * Uses nsIConverterInputStream to decode UTF-16 text with all combinations
     1.7 + * of UTF-16BE and UTF-16LE with and without BOM.
     1.8 + *
     1.9 + * Sample text is: "Все счастливые семьи похожи друг на друга, каждая несчастливая семья несчастлива по-своему."
    1.10 + *
    1.11 + * The enclosing quotation marks are included in the sample text to test that
    1.12 + * UTF-16LE is recognized even when there is no BOM and the UTF-16LE decoder is
    1.13 + * not explicitly called. This only works when the first character of the text
    1.14 + * is an eight-bit character.
    1.15 + */
    1.16 +
    1.17 +const beBOM="%FE%FF";
    1.18 +const leBOM="%FF%FE";
    1.19 +const sampleUTF16BE="%00%22%04%12%04%41%04%35%00%20%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%00%20%04%41%04%35%04%3C%04%4C%04%38%00%20%04%3F%04%3E%04%45%04%3E%04%36%04%38%00%20%04%34%04%40%04%43%04%33%00%20%04%3D%04%30%00%20%04%34%04%40%04%43%04%33%04%30%00%2C%00%20%04%3A%04%30%04%36%04%34%04%30%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%00%20%04%41%04%35%04%3C%04%4C%04%4F%00%20%04%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%00%20%04%3F%04%3E%00%2D%04%41%04%32%04%3E%04%35%04%3C%04%43%00%2E%00%22";
    1.20 +const sampleUTF16LE="%22%00%12%04%41%04%35%04%20%00%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%4B%04%35%04%20%00%41%04%35%04%3C%04%4C%04%38%04%20%00%3F%04%3E%04%45%04%3E%04%36%04%38%04%20%00%34%04%40%04%43%04%33%04%20%00%3D%04%30%04%20%00%34%04%40%04%43%04%33%04%30%04%2C%00%20%00%3A%04%30%04%36%04%34%04%30%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%4F%04%20%00%41%04%35%04%3C%04%4C%04%4F%04%20%00%3D%04%35%04%41%04%47%04%30%04%41%04%42%04%3B%04%38%04%32%04%30%04%20%00%3F%04%3E%04%2D%00%41%04%32%04%3E%04%35%04%3C%04%43%04%2E%00%22%00";
    1.21 +const expected = "\"\u0412\u0441\u0435 \u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u044B\u0435 \u0441\u0435\u043C\u044C\u0438 \u043F\u043E\u0445\u043E\u0436\u0438 \u0434\u0440\u0443\u0433 \u043D\u0430 \u0434\u0440\u0443\u0433\u0430, \u043A\u0430\u0436\u0434\u0430\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430\u044F \u0441\u0435\u043C\u044C\u044F \u043D\u0435\u0441\u0447\u0430\u0441\u0442\u043B\u0438\u0432\u0430 \u043F\u043E-\u0441\u0432\u043E\u0435\u043C\u0443.\""; 
    1.22 +
    1.23 +function makeText(withBOM, charset)
    1.24 +{
    1.25 +  var theText = eval("sample" + charset);
    1.26 +  if (withBOM) {
    1.27 +    if (charset == "UTF16BE") {
    1.28 +      theText = beBOM + theText;
    1.29 +    } else {
    1.30 +      theText = leBOM + theText;
    1.31 +    }
    1.32 +  }
    1.33 +  return theText;
    1.34 +}
    1.35 +
    1.36 +function testCase(withBOM, charset, charsetDec, decoder, bufferLength)
    1.37 +{
    1.38 +  var dataURI = "data:text/plain;charset=" + charsetDec + "," +
    1.39 +                 makeText(withBOM, charset);
    1.40 +
    1.41 +  var IOService = Components.Constructor("@mozilla.org/network/io-service;1",
    1.42 +					 "nsIIOService");
    1.43 +  var ConverterInputStream =
    1.44 +      Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
    1.45 +			     "nsIConverterInputStream",
    1.46 +			     "init");
    1.47 +
    1.48 +  var ios = new IOService();
    1.49 +  var channel = ios.newChannel(dataURI, "", null);
    1.50 +  var testInputStream = channel.open();
    1.51 +  var testConverter = new ConverterInputStream(testInputStream,
    1.52 +					       decoder,
    1.53 +					       bufferLength,
    1.54 +					       0xFFFD);
    1.55 +
    1.56 +  if (!(testConverter instanceof
    1.57 +	Components.interfaces.nsIUnicharLineInputStream))
    1.58 +      throw "not line input stream";
    1.59 +
    1.60 +  var outStr = "";
    1.61 +  var more;
    1.62 +  do {
    1.63 +      // read the line and check for eof
    1.64 +      var line = {};
    1.65 +      more = testConverter.readLine(line);
    1.66 +      outStr += line.value;
    1.67 +  } while (more);
    1.68 +
    1.69 +  if (outStr != expected) {
    1.70 +    dump("Failed with BOM = " + withBOM + "; charset = " + charset +
    1.71 +	 "; charset declaration = " + charsetDec + "; decoder = " + decoder +
    1.72 +	 "; bufferLength = " + bufferLength + "\n");
    1.73 +    if (outStr.length == expected.length) {
    1.74 +      for (i = 0; i < outStr.length; ++i) {
    1.75 +	if (outStr.charCodeAt(i) != expected.charCodeAt(i)) {
    1.76 +	  dump(i + ": " + outStr.charCodeAt(i).toString(16) + " != " + expected.charCodeAt(i).toString(16) + "\n");
    1.77 +	}
    1.78 +      }
    1.79 +    }
    1.80 +  }
    1.81 +
    1.82 +  // escape the strings before comparing for better readability
    1.83 +  do_check_eq(escape(outStr), escape(expected));
    1.84 +}
    1.85 +
    1.86 +function run_test()
    1.87 +{
    1.88 +    /*       BOM    charset    charset   decoder     buffer
    1.89 +                               declaration           length */
    1.90 +    testCase(true,  "UTF16LE", "UTF-16", "UTF-16",   64);
    1.91 +    testCase(true,  "UTF16BE", "UTF-16", "UTF-16",   64);
    1.92 +    testCase(true,  "UTF16LE", "UTF-16", "UTF-16LE", 64);
    1.93 +    testCase(true,  "UTF16BE", "UTF-16", "UTF-16BE", 64);
    1.94 +    testCase(false, "UTF16LE", "UTF-16", "UTF-16",   64);
    1.95 +    testCase(false, "UTF16BE", "UTF-16", "UTF-16",   64);
    1.96 +    testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 64);
    1.97 +    testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 64);
    1.98 +    testCase(true,  "UTF16LE", "UTF-16", "UTF-16",   65);
    1.99 +    testCase(true,  "UTF16BE", "UTF-16", "UTF-16",   65);
   1.100 +    testCase(true,  "UTF16LE", "UTF-16", "UTF-16LE", 65);
   1.101 +    testCase(true,  "UTF16BE", "UTF-16", "UTF-16BE", 65);
   1.102 +    testCase(false, "UTF16LE", "UTF-16", "UTF-16",   65);
   1.103 +    testCase(false, "UTF16BE", "UTF-16", "UTF-16",   65);
   1.104 +    testCase(false, "UTF16LE", "UTF-16", "UTF-16LE", 65);
   1.105 +    testCase(false, "UTF16BE", "UTF-16", "UTF-16BE", 65);
   1.106 +}

mercurial