michael@0: /* Test case for bug 317216 michael@0: * michael@0: * Uses nsIConverterInputStream to decode UTF-16 text with valid surrogate michael@0: * pairs and lone surrogate characters michael@0: * michael@0: * Sample text is: "A" in Mathematical Bold Capitals (U+1D400) michael@0: * michael@0: * The test uses buffers of 4 different lengths to test end of buffer in mid- michael@0: * UTF16 character and mid-surrogate pair michael@0: */ michael@0: michael@0: const test = [ michael@0: // 0: Valid surrogate pair michael@0: ["%D8%35%DC%20%00%2D%00%2D", michael@0: // expected: surrogate pair michael@0: "\uD835\uDC20--"], michael@0: // 1: Lone high surrogate michael@0: ["%D8%35%00%2D%00%2D", michael@0: // expected: one replacement char michael@0: "\uFFFD--"], michael@0: // 2: Lone low surrogate michael@0: ["%DC%20%00%2D%00%2D", michael@0: // expected: one replacement char michael@0: "\uFFFD--"], michael@0: // 3: Two high surrogates michael@0: ["%D8%35%D8%35%00%2D%00%2D", michael@0: // expected: two replacement chars michael@0: "\uFFFD\uFFFD--"], michael@0: // 4: Two low surrogates michael@0: ["%DC%20%DC%20%00%2D%00%2D", michael@0: // expected: two replacement chars michael@0: "\uFFFD\uFFFD--"], michael@0: // 5: Low surrogate followed by high surrogate michael@0: ["%DC%20%D8%35%00%2D%00%2D", michael@0: // expected: two replacement chars michael@0: "\uFFFD\uFFFD--"], michael@0: // 6: Lone high surrogate followed by valid surrogate pair michael@0: ["%D8%35%D8%35%DC%20%00%2D%00%2D", michael@0: // expected: replacement char followed by surrogate pair michael@0: "\uFFFD\uD835\uDC20--"], michael@0: // 7: Lone low surrogate followed by valid surrogate pair michael@0: ["%DC%20%D8%35%DC%20%00%2D%00%2D", michael@0: // expected: replacement char followed by surrogate pair michael@0: "\uFFFD\uD835\uDC20--"], michael@0: // 8: Valid surrogate pair followed by lone high surrogate michael@0: ["%D8%35%DC%20%D8%35%00%2D%00%2D", michael@0: // expected: surrogate pair followed by replacement char michael@0: "\uD835\uDC20\uFFFD--"], michael@0: // 9: Valid surrogate pair followed by lone low surrogate michael@0: ["%D8%35%DC%20%DC%20%00%2D%00%2D", michael@0: // expected: surrogate pair followed by replacement char michael@0: "\uD835\uDC20\uFFFD--"], michael@0: // 10: Lone high surrogate at the end of the input michael@0: ["%D8%35%", michael@0: // expected: nothing michael@0: ""], michael@0: // 11: Half code unit at the end of the input michael@0: ["%D8", michael@0: // expected: nothing michael@0: ""]]; michael@0: michael@0: const IOService = Components.Constructor("@mozilla.org/network/io-service;1", michael@0: "nsIIOService"); michael@0: const ConverterInputStream = michael@0: Components.Constructor("@mozilla.org/intl/converter-input-stream;1", michael@0: "nsIConverterInputStream", michael@0: "init"); michael@0: const ios = new IOService(); michael@0: michael@0: function testCase(testText, expectedText, bufferLength, charset) michael@0: { michael@0: var dataURI = "data:text/plain;charset=" + charset + "," + testText; michael@0: michael@0: var channel = ios.newChannel(dataURI, "", null); michael@0: var testInputStream = channel.open(); michael@0: var testConverter = new ConverterInputStream(testInputStream, michael@0: charset, michael@0: bufferLength, michael@0: 0xFFFD); michael@0: michael@0: if (!(testConverter instanceof michael@0: Components.interfaces.nsIUnicharLineInputStream)) michael@0: throw "not line input stream"; michael@0: michael@0: var outStr = ""; michael@0: var more; michael@0: do { michael@0: // read the line and check for eof michael@0: var line = {}; michael@0: more = testConverter.readLine(line); michael@0: outStr += line.value; michael@0: } while (more); michael@0: michael@0: // escape the strings before comparing for better readability michael@0: do_check_eq(escape(outStr), escape(expectedText)); michael@0: } michael@0: michael@0: // Add 32 dummy characters to the test text to work around the minimum buffer michael@0: // size of an ns*Buffer michael@0: const MINIMUM_BUFFER_SIZE=32; michael@0: function padBytes(str) michael@0: { michael@0: var padding = ""; michael@0: for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { michael@0: padding += "%00%2D"; michael@0: } michael@0: return padding + str; michael@0: } michael@0: michael@0: function padUnichars(str) michael@0: { michael@0: var padding = ""; michael@0: for (var i = 0; i < MINIMUM_BUFFER_SIZE; ++i) { michael@0: padding += "-"; michael@0: } michael@0: return padding + str; michael@0: } michael@0: michael@0: // Byte-swap %-encoded utf-16 michael@0: function flip(str) { return str.replace(/(%..)(%..)/g, "$2$1"); } michael@0: michael@0: function run_test() michael@0: { michael@0: for (var i = 0; i < 12; ++i) { michael@0: for (var bufferLength = MINIMUM_BUFFER_SIZE; michael@0: bufferLength < MINIMUM_BUFFER_SIZE + 4; michael@0: ++ bufferLength) { michael@0: var testText = padBytes(test[i][0]); michael@0: var expectedText = padUnichars(test[i][1]); michael@0: testCase(testText, expectedText, bufferLength, "UTF-16BE"); michael@0: testCase(flip(testText), expectedText, bufferLength, "UTF-16LE"); michael@0: } michael@0: } michael@0: }