michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* michael@0: * michael@0: * Date: 15 July 2002 michael@0: * SUMMARY: Testing identifiers with double-byte names michael@0: * See http://bugzilla.mozilla.org/show_bug.cgi?id=58274 michael@0: * michael@0: * Here is a sample of the problem: michael@0: * michael@0: * js> function f\u02B1 () {} michael@0: * michael@0: * js> f\u02B1.toSource(); michael@0: * function f¦() {} michael@0: * michael@0: * js> f\u02B1.toSource().toSource(); michael@0: * (new String("function f\xB1() {}")) michael@0: * michael@0: * michael@0: * See how the high-byte information (the 02) has been lost? michael@0: * The same thing was happening with the toString() method: michael@0: * michael@0: * js> f\u02B1.toString(); michael@0: * michael@0: * function f¦() { michael@0: * } michael@0: * michael@0: * js> f\u02B1.toString().toSource(); michael@0: * (new String("\nfunction f\xB1() {\n}\n")) michael@0: * michael@0: */ michael@0: //----------------------------------------------------------------------------- michael@0: var UBound = 0; michael@0: var BUGNUMBER = 58274; michael@0: var summary = 'Testing identifiers with double-byte names'; michael@0: var status = ''; michael@0: var statusitems = []; michael@0: var actual = ''; michael@0: var actualvalues = []; michael@0: var expect= ''; michael@0: var expectedvalues = []; michael@0: michael@0: michael@0: /* michael@0: * Define a function that uses double-byte identifiers in michael@0: * "every possible way" michael@0: * michael@0: * Then recover each double-byte identifier via f.toString(). michael@0: * To make this easier, put a 'Z' token before every one. michael@0: * michael@0: * Our eval string will be: michael@0: * michael@0: * sEval = "function Z\u02b1(Z\u02b2, b) { michael@0: * try { Z\u02b3 : var Z\u02b4 = Z\u02b1; } michael@0: * catch (Z\u02b5) { for (var Z\u02b6 in Z\u02b5) michael@0: * {for (1; 1<0; Z\u02b7++) {new Array()[Z\u02b6] = 1;} };} }"; michael@0: * michael@0: * It will be helpful to build this string in stages: michael@0: */ michael@0: var s0 = 'function Z'; michael@0: var s1 = '\u02b1(Z'; michael@0: var s2 = '\u02b2, b) {try { Z'; michael@0: var s3 = '\u02b3 : var Z'; michael@0: var s4 = '\u02b4 = Z'; michael@0: var s5 = '\u02b1; } catch (Z' michael@0: var s6 = '\u02b5) { for (var Z'; michael@0: var s7 = '\u02b6 in Z'; michael@0: var s8 = '\u02b5){for (1; 1<0; Z'; michael@0: var s9 = '\u02b7++) {new Array()[Z'; michael@0: var s10 = '\u02b6] = 1;} };} }'; michael@0: michael@0: michael@0: /* michael@0: * Concatenate these and eval() to create the function Z\u02b1 michael@0: */ michael@0: var sEval = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10; michael@0: eval(sEval); michael@0: michael@0: michael@0: /* michael@0: * Recover all the double-byte identifiers via Z\u02b1.toString(). michael@0: * We'll recover the 1st one as arrID[1], the 2nd one as arrID[2], michael@0: * and so on ... michael@0: */ michael@0: var arrID = getIdentifiers(Z\u02b1); michael@0: michael@0: michael@0: /* michael@0: * Now check that we got back what we put in - michael@0: */ michael@0: status = inSection(1); michael@0: actual = arrID[1]; michael@0: expect = s1.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(2); michael@0: actual = arrID[2]; michael@0: expect = s2.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(3); michael@0: actual = arrID[3]; michael@0: expect = s3.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(4); michael@0: actual = arrID[4]; michael@0: expect = s4.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(5); michael@0: actual = arrID[5]; michael@0: expect = s5.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(6); michael@0: actual = arrID[6]; michael@0: expect = s6.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(7); michael@0: actual = arrID[7]; michael@0: expect = s7.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(8); michael@0: actual = arrID[8]; michael@0: expect = s8.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(9); michael@0: actual = arrID[9]; michael@0: expect = s9.charAt(0); michael@0: addThis(); michael@0: michael@0: status = inSection(10); michael@0: actual = arrID[10]; michael@0: expect = s10.charAt(0); michael@0: addThis(); michael@0: michael@0: michael@0: michael@0: michael@0: //----------------------------------------------------------------------------- michael@0: test(); michael@0: //----------------------------------------------------------------------------- michael@0: michael@0: michael@0: michael@0: /* michael@0: * Goal: recover the double-byte identifiers from f.toString() michael@0: * by getting the very next character after each 'Z' token. michael@0: * michael@0: * The return value will be an array |arr| indexed such that michael@0: * |arr[1]| is the 1st identifier, |arr[2]| the 2nd, and so on. michael@0: * michael@0: * Note, however, f.toString() is implementation-independent. michael@0: * For example, it may begin with '\nfunction' instead of 'function'. michael@0: * michael@0: * Rhino uses a Unicode representation for f.toString(); whereas michael@0: * SpiderMonkey uses an ASCII representation, putting escape sequences michael@0: * for non-ASCII characters. For example, if a function is called f\u02B1, michael@0: * then in Rhino the toString() method will present a 2-character Unicode michael@0: * string for its name, whereas SpiderMonkey will present a 7-character michael@0: * ASCII string for its name: the string literal 'f\u02B1'. michael@0: * michael@0: * So we force the lexer to condense the string before we use it. michael@0: * This will give uniform results in Rhino and SpiderMonkey. michael@0: */ michael@0: function getIdentifiers(f) michael@0: { michael@0: var str = condenseStr(f.toString()); michael@0: var arr = str.split('Z'); michael@0: michael@0: /* michael@0: * The identifiers are the 1st char of each split substring michael@0: * EXCEPT the first one, which is just ('\n' +) 'function '. michael@0: * michael@0: * Thus note the 1st identifier will be stored in |arr[1]|, michael@0: * the 2nd one in |arr[2]|, etc., making the indexing easy - michael@0: */ michael@0: for (i in arr) michael@0: arr[i] = arr[i].charAt(0); michael@0: return arr; michael@0: } michael@0: michael@0: michael@0: /* michael@0: * This function is the opposite of a functions like escape(), which take michael@0: * Unicode characters and return escape sequences for them. Here, we force michael@0: * the lexer to turn escape sequences back into single characters. michael@0: * michael@0: * Note we can't simply do |eval(str)|, since in practice |str| will be an michael@0: * identifier somewhere in the program (e.g. a function name); thus |eval(str)| michael@0: * would return the object that the identifier represents: not what we want. michael@0: * michael@0: * So we surround |str| lexicographically with quotes to force the lexer to michael@0: * evaluate it as a string. Have to strip out any linefeeds first, however - michael@0: */ michael@0: function condenseStr(str) michael@0: { michael@0: /* michael@0: * You won't be able to do the next step if |str| has michael@0: * any carriage returns or linefeeds in it. For example: michael@0: * michael@0: * js> eval("'" + '\nHello' + "'"); michael@0: * 1: SyntaxError: unterminated string literal: michael@0: * 1: ' michael@0: * 1: ^ michael@0: * michael@0: * So replace them with the empty string - michael@0: */ michael@0: str = str.replace(/[\r\n]/g, '') michael@0: return eval("'" + str + "'") michael@0: } michael@0: michael@0: michael@0: function addThis() michael@0: { michael@0: statusitems[UBound] = status; michael@0: actualvalues[UBound] = actual; michael@0: expectedvalues[UBound] = expect; michael@0: UBound++; michael@0: } michael@0: michael@0: michael@0: function test() michael@0: { michael@0: enterFunc('test'); michael@0: printBugNumber(BUGNUMBER); michael@0: printStatus(summary); michael@0: michael@0: for (var i=0; i