intl/uconv/tests/unit/test_utf8_illegals.js

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/intl/uconv/tests/unit/test_utf8_illegals.js	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,136 @@
     1.4 +// Tests illegal UTF-8 sequences
     1.5 +
     1.6 +const Cc = Components.Constructor;
     1.7 +const Ci = Components.interfaces;
     1.8 +
     1.9 +const tests = [
    1.10 +{ inStrings: ["%80",                 // Illegal or incomplete sequences
    1.11 +              "%8f",
    1.12 +              "%90",
    1.13 +              "%9f",
    1.14 +              "%a0",
    1.15 +              "%bf",
    1.16 +              "%c0",
    1.17 +              "%c1",
    1.18 +              "%c2",
    1.19 +              "%df",
    1.20 +              "%e0",
    1.21 +              "%e0%a0",
    1.22 +              "%e0%bf",
    1.23 +              "%ed%80",
    1.24 +              "%ed%9f",
    1.25 +              "%ef",
    1.26 +              "%ef%bf",
    1.27 +              "%f0",
    1.28 +              "%f0%90",
    1.29 +              "%f0%90%80",
    1.30 +              "%f0%90%bf",
    1.31 +              "%f0%bf",
    1.32 +              "%f0%bf%80",
    1.33 +              "%f0%bf%bf",
    1.34 +              "%f4",
    1.35 +              "%f4%80",
    1.36 +              "%f4%80%80",
    1.37 +              "%f4%80%bf",
    1.38 +              "%f4%8f",
    1.39 +              "%f4%8f%80",
    1.40 +              "%f4%8f%bf",
    1.41 +              "%f5",
    1.42 +              "%f7",
    1.43 +              "%f8",
    1.44 +              "%fb",
    1.45 +              "%fc",
    1.46 +              "%fd"],
    1.47 +  expected: "ABC\ufffdXYZ" },
    1.48 +
    1.49 +{ inStrings: ["%c0%af",              // Illegal bytes in 2-octet
    1.50 +              "%c1%af"],             //  sequences
    1.51 +  expected: "ABC\ufffd\ufffdXYZ" },
    1.52 +
    1.53 +{ inStrings: ["%e0%80%80",           // Illegal bytes in 3-octet
    1.54 +              "%e0%80%af",           //  sequences
    1.55 +              "%e0%9f%bf",
    1.56 +                                     // long surrogates
    1.57 +              "%ed%a0%80",           // D800
    1.58 +              "%ed%ad%bf",           // DB7F
    1.59 +              "%ed%ae%80",           // DB80
    1.60 +              "%ed%af%bf",           // DBFF
    1.61 +              "%ed%b0%80",           // DC00
    1.62 +              "%ed%be%80",           // DF80
    1.63 +              "%ed%bf%bf"],          // DFFF
    1.64 +  expected: "ABC\ufffd\ufffd\ufffdXYZ" },
    1.65 +
    1.66 +{ inStrings: ["%f0%80%80%80",        // Illegal bytes in 4-octet
    1.67 +              "%f0%80%80%af",        //  sequences
    1.68 +              "%f0%8f%bf%bf",
    1.69 +              "%f4%90%80%80",
    1.70 +              "%f4%bf%bf%bf",
    1.71 +              "%f5%80%80%80",
    1.72 +              "%f7%bf%bf%bf"],
    1.73 +  expected: "ABC\ufffd\ufffd\ufffd\ufffdXYZ" },
    1.74 +
    1.75 +{ inStrings: ["%f8%80%80%80%80",     // Illegal bytes in 5-octet
    1.76 +              "%f8%80%80%80%af",     //  sequences
    1.77 +              "%fb%bf%bf%bf%bf"],
    1.78 +  expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffdXYZ" },
    1.79 +
    1.80 +                                     // Surrogate pairs
    1.81 +{ inStrings: ["%ed%a0%80%ed%b0%80",  // D800 DC00
    1.82 +              "%ed%a0%80%ed%bf%bf",  // D800 DFFF
    1.83 +              "%ed%ad%bf%ed%b0%80",  // DB7F DC00
    1.84 +              "%ed%ad%bf%ed%bf%bf",  // DB7F DFFF
    1.85 +              "%ed%ae%80%ed%b0%80",  // DB80 DC00
    1.86 +              "%ed%ae%80%ed%bf%bf",  // DB80 DFFF
    1.87 +              "%ed%af%bf%ed%b0%80",  // DBFF DC00
    1.88 +              "%ed%ad%bf%ed%bf%bf",  // DBFF DFFF
    1.89 +              "%fc%80%80%80%80%80",  // Illegal bytes in 6-octet
    1.90 +              "%fc%80%80%80%80%af",  //  sequences
    1.91 +              "%fd%bf%bf%bf%bf%bf"],
    1.92 +  expected: "ABC\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdXYZ" },
    1.93 +];
    1.94 +
    1.95 +
    1.96 +function testCaseInputStream(inStr, expected)
    1.97 +{
    1.98 +  var dataURI = "data:text/plain; charset=UTF-8,ABC" + inStr + "XYZ"
    1.99 +  dump(inStr + "==>");
   1.100 +
   1.101 +  var IOService = Cc("@mozilla.org/network/io-service;1",
   1.102 +		     "nsIIOService");
   1.103 +  var ConverterInputStream =
   1.104 +      Cc("@mozilla.org/intl/converter-input-stream;1",
   1.105 +	 "nsIConverterInputStream",
   1.106 +	 "init");
   1.107 +
   1.108 +  var ios = new IOService();
   1.109 +  var channel = ios.newChannel(dataURI, "", null);
   1.110 +  var testInputStream = channel.open();
   1.111 +  var testConverter = new ConverterInputStream(testInputStream,
   1.112 +					       "UTF-8",
   1.113 +					       16,
   1.114 +					       0xFFFD);
   1.115 +
   1.116 +  if (!(testConverter instanceof Ci.nsIUnicharLineInputStream))
   1.117 +      throw "not line input stream";
   1.118 +
   1.119 +  var outStr = "";
   1.120 +  var more;
   1.121 +  do {
   1.122 +      // read the line and check for eof
   1.123 +      var line = {};
   1.124 +      more = testConverter.readLine(line);
   1.125 +      outStr += line.value;
   1.126 +  } while (more);
   1.127 +
   1.128 +  dump(outStr + "; expected=" + expected + "\n");
   1.129 +  do_check_eq(outStr, expected);
   1.130 +  do_check_eq(outStr.length, expected.length);
   1.131 +}
   1.132 +
   1.133 +function run_test() {
   1.134 +  for (var t of tests) {
   1.135 +    for (var inStr of t.inStrings) {
   1.136 +      testCaseInputStream(inStr, t.expected);
   1.137 +    }
   1.138 +  }
   1.139 +}

mercurial