michael@0: /* Test case for bug 317216
michael@0:  *
michael@0:  * Uses nsIConverterInputStream to decode UTF-16 text with valid surrogate
michael@0:  * pairs and lone surrogate characters
michael@0:  *
michael@0:  * Sample text is: "A" in Mathematical Bold Capitals (U+1D400)
michael@0:  *
michael@0:  * The test uses buffers of 4 different lengths to test end of buffer in mid-
michael@0:  * UTF16 character and mid-surrogate pair
michael@0:  */
michael@0: 
michael@0: const test = [
michael@0: // 0: Valid surrogate pair
michael@0:               ["%D8%35%DC%20%00%2D%00%2D",
michael@0: //    expected: surrogate pair
michael@0:                "\uD835\uDC20--"],
michael@0: // 1: Lone high surrogate
michael@0:               ["%D8%35%00%2D%00%2D",
michael@0: //    expected: one replacement char
michael@0:                "\uFFFD--"],
michael@0: // 2: Lone low surrogate
michael@0:               ["%DC%20%00%2D%00%2D",
michael@0: //    expected: one replacement char
michael@0:                "\uFFFD--"],
michael@0: // 3: Two high surrogates
michael@0:               ["%D8%35%D8%35%00%2D%00%2D",
michael@0: //    expected: two replacement chars
michael@0:                "\uFFFD\uFFFD--"],
michael@0: // 4: Two low surrogates
michael@0:               ["%DC%20%DC%20%00%2D%00%2D",
michael@0: //    expected: two replacement chars
michael@0: 	       "\uFFFD\uFFFD--"],
michael@0: // 5: Low surrogate followed by high surrogate
michael@0:               ["%DC%20%D8%35%00%2D%00%2D",
michael@0: //    expected: two replacement chars
michael@0:                "\uFFFD\uFFFD--"],
michael@0: // 6: Lone high surrogate followed by valid surrogate pair
michael@0:               ["%D8%35%D8%35%DC%20%00%2D%00%2D",
michael@0: //    expected: replacement char followed by surrogate pair
michael@0:                "\uFFFD\uD835\uDC20--"],
michael@0: // 7: Lone low surrogate followed by valid surrogate pair
michael@0:               ["%DC%20%D8%35%DC%20%00%2D%00%2D",
michael@0: //    expected: replacement char followed by surrogate pair
michael@0:                "\uFFFD\uD835\uDC20--"],
michael@0: // 8: Valid surrogate pair followed by lone high surrogate
michael@0:               ["%D8%35%DC%20%D8%35%00%2D%00%2D",
michael@0: //    expected: surrogate pair followed by replacement char
michael@0:                "\uD835\uDC20\uFFFD--"],
michael@0: // 9: Valid surrogate pair followed by lone low surrogate
michael@0:               ["%D8%35%DC%20%DC%20%00%2D%00%2D",
michael@0: //    expected: surrogate pair followed by replacement char
michael@0:                "\uD835\uDC20\uFFFD--"],
michael@0: // 10: Lone high surrogate at the end of the input
michael@0:               ["%D8%35%",
michael@0: //    expected: nothing
michael@0:                ""],
michael@0: // 11: Half code unit at the end of the input
michael@0:               ["%D8",
michael@0: //    expected: nothing
michael@0:               ""]];
michael@0: 
michael@0: const IOService = Components.Constructor("@mozilla.org/network/io-service;1",
michael@0:                                          "nsIIOService");
michael@0: const ConverterInputStream =
michael@0:       Components.Constructor("@mozilla.org/intl/converter-input-stream;1",
michael@0:                              "nsIConverterInputStream",
michael@0:                              "init");
michael@0: const ios = new IOService();
michael@0: 
michael@0: function testCase(testText, expectedText, bufferLength, charset)
michael@0: {
michael@0:   var dataURI = "data:text/plain;charset=" + charset + "," + testText;
michael@0: 
michael@0:   var channel = ios.newChannel(dataURI, "", null);
michael@0:   var testInputStream = channel.open();
michael@0:   var testConverter = new ConverterInputStream(testInputStream,
michael@0:                                                charset,
michael@0:                                                bufferLength,
michael@0:                                                0xFFFD);
michael@0: 
michael@0:   if (!(testConverter instanceof
michael@0:         Components.interfaces.nsIUnicharLineInputStream))
michael@0:     throw "not line input stream";
michael@0: 
michael@0:   var outStr = "";
michael@0:   var more;
michael@0:   do {
michael@0:     // read the line and check for eof
michael@0:     var line = {};
michael@0:     more = testConverter.readLine(line);
michael@0:     outStr += line.value;
michael@0:   } while (more);
michael@0: 
michael@0:   // escape the strings before comparing for better readability
michael@0:   do_check_eq(escape(outStr), escape(expectedText));
michael@0: }
michael@0: 
michael@0: // Add 32 dummy characters to the test text to work around the minimum buffer
michael@0: // size of an ns*Buffer
michael@0: const MINIMUM_BUFFER_SIZE=32;
michael@0: function padBytes(str)
michael@0: {
michael@0:   var padding = "";
michael@0:   for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) {
michael@0:     padding += "%00%2D";
michael@0:   }
michael@0:   return padding + str;
michael@0: }
michael@0: 
michael@0: function padUnichars(str)
michael@0: {
michael@0:   var padding = "";
michael@0:   for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) {
michael@0:     padding += "-";
michael@0:   }
michael@0:   return padding + str;
michael@0: }
michael@0: 
michael@0: // Byte-swap %-encoded utf-16
michael@0: function flip(str) { return str.replace(/(%..)(%..)/g, "$2$1"); }
michael@0: 
michael@0: function run_test()
michael@0: {
michael@0:   for (var i = 0; i < 12; ++i) {
michael@0:     for (var bufferLength = MINIMUM_BUFFER_SIZE;
michael@0: 	 bufferLength < MINIMUM_BUFFER_SIZE + 4;
michael@0: 	 ++ bufferLength) {
michael@0:       var testText = padBytes(test[i][0]);
michael@0:       var expectedText = padUnichars(test[i][1]);
michael@0:       testCase(testText, expectedText, bufferLength, "UTF-16BE");
michael@0:       testCase(flip(testText), expectedText, bufferLength, "UTF-16LE");
michael@0:     }
michael@0:   }
michael@0: }