1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/tests/ecma_3/Unicode/uc-005.js Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,242 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* 1.10 + * 1.11 + * Date: 15 July 2002 1.12 + * SUMMARY: Testing identifiers with double-byte names 1.13 + * See http://bugzilla.mozilla.org/show_bug.cgi?id=58274 1.14 + * 1.15 + * Here is a sample of the problem: 1.16 + * 1.17 + * js> function f\u02B1 () {} 1.18 + * 1.19 + * js> f\u02B1.toSource(); 1.20 + * function f¦() {} 1.21 + * 1.22 + * js> f\u02B1.toSource().toSource(); 1.23 + * (new String("function f\xB1() {}")) 1.24 + * 1.25 + * 1.26 + * See how the high-byte information (the 02) has been lost? 1.27 + * The same thing was happening with the toString() method: 1.28 + * 1.29 + * js> f\u02B1.toString(); 1.30 + * 1.31 + * function f¦() { 1.32 + * } 1.33 + * 1.34 + * js> f\u02B1.toString().toSource(); 1.35 + * (new String("\nfunction f\xB1() {\n}\n")) 1.36 + * 1.37 + */ 1.38 +//----------------------------------------------------------------------------- 1.39 +var UBound = 0; 1.40 +var BUGNUMBER = 58274; 1.41 +var summary = 'Testing identifiers with double-byte names'; 1.42 +var status = ''; 1.43 +var statusitems = []; 1.44 +var actual = ''; 1.45 +var actualvalues = []; 1.46 +var expect= ''; 1.47 +var expectedvalues = []; 1.48 + 1.49 + 1.50 +/* 1.51 + * Define a function that uses double-byte identifiers in 1.52 + * "every possible way" 1.53 + * 1.54 + * Then recover each double-byte identifier via f.toString(). 1.55 + * To make this easier, put a 'Z' token before every one. 1.56 + * 1.57 + * Our eval string will be: 1.58 + * 1.59 + * sEval = "function Z\u02b1(Z\u02b2, b) { 1.60 + * try { Z\u02b3 : var Z\u02b4 = Z\u02b1; } 1.61 + * catch (Z\u02b5) { for (var Z\u02b6 in Z\u02b5) 1.62 + * {for (1; 1<0; Z\u02b7++) {new Array()[Z\u02b6] = 1;} };} }"; 1.63 + * 1.64 + * It will be helpful to build this string in stages: 1.65 + */ 1.66 +var s0 = 'function Z'; 1.67 +var s1 = '\u02b1(Z'; 1.68 +var s2 = '\u02b2, b) {try { Z'; 1.69 +var s3 = '\u02b3 : var Z'; 1.70 +var s4 = '\u02b4 = Z'; 1.71 +var s5 = '\u02b1; } catch (Z' 1.72 + var s6 = '\u02b5) { for (var Z'; 1.73 +var s7 = '\u02b6 in Z'; 1.74 +var s8 = '\u02b5){for (1; 1<0; Z'; 1.75 +var s9 = '\u02b7++) {new Array()[Z'; 1.76 +var s10 = '\u02b6] = 1;} };} }'; 1.77 + 1.78 + 1.79 +/* 1.80 + * Concatenate these and eval() to create the function Z\u02b1 1.81 + */ 1.82 +var sEval = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10; 1.83 +eval(sEval); 1.84 + 1.85 + 1.86 +/* 1.87 + * Recover all the double-byte identifiers via Z\u02b1.toString(). 1.88 + * We'll recover the 1st one as arrID[1], the 2nd one as arrID[2], 1.89 + * and so on ... 1.90 + */ 1.91 +var arrID = getIdentifiers(Z\u02b1); 1.92 + 1.93 + 1.94 +/* 1.95 + * Now check that we got back what we put in - 1.96 + */ 1.97 +status = inSection(1); 1.98 +actual = arrID[1]; 1.99 +expect = s1.charAt(0); 1.100 +addThis(); 1.101 + 1.102 +status = inSection(2); 1.103 +actual = arrID[2]; 1.104 +expect = s2.charAt(0); 1.105 +addThis(); 1.106 + 1.107 +status = inSection(3); 1.108 +actual = arrID[3]; 1.109 +expect = s3.charAt(0); 1.110 +addThis(); 1.111 + 1.112 +status = inSection(4); 1.113 +actual = arrID[4]; 1.114 +expect = s4.charAt(0); 1.115 +addThis(); 1.116 + 1.117 +status = inSection(5); 1.118 +actual = arrID[5]; 1.119 +expect = s5.charAt(0); 1.120 +addThis(); 1.121 + 1.122 +status = inSection(6); 1.123 +actual = arrID[6]; 1.124 +expect = s6.charAt(0); 1.125 +addThis(); 1.126 + 1.127 +status = inSection(7); 1.128 +actual = arrID[7]; 1.129 +expect = s7.charAt(0); 1.130 +addThis(); 1.131 + 1.132 +status = inSection(8); 1.133 +actual = arrID[8]; 1.134 +expect = s8.charAt(0); 1.135 +addThis(); 1.136 + 1.137 +status = inSection(9); 1.138 +actual = arrID[9]; 1.139 +expect = s9.charAt(0); 1.140 +addThis(); 1.141 + 1.142 +status = inSection(10); 1.143 +actual = arrID[10]; 1.144 +expect = s10.charAt(0); 1.145 +addThis(); 1.146 + 1.147 + 1.148 + 1.149 + 1.150 +//----------------------------------------------------------------------------- 1.151 +test(); 1.152 +//----------------------------------------------------------------------------- 1.153 + 1.154 + 1.155 + 1.156 +/* 1.157 + * Goal: recover the double-byte identifiers from f.toString() 1.158 + * by getting the very next character after each 'Z' token. 1.159 + * 1.160 + * The return value will be an array |arr| indexed such that 1.161 + * |arr[1]| is the 1st identifier, |arr[2]| the 2nd, and so on. 1.162 + * 1.163 + * Note, however, f.toString() is implementation-independent. 1.164 + * For example, it may begin with '\nfunction' instead of 'function'. 1.165 + * 1.166 + * Rhino uses a Unicode representation for f.toString(); whereas 1.167 + * SpiderMonkey uses an ASCII representation, putting escape sequences 1.168 + * for non-ASCII characters. For example, if a function is called f\u02B1, 1.169 + * then in Rhino the toString() method will present a 2-character Unicode 1.170 + * string for its name, whereas SpiderMonkey will present a 7-character 1.171 + * ASCII string for its name: the string literal 'f\u02B1'. 1.172 + * 1.173 + * So we force the lexer to condense the string before we use it. 1.174 + * This will give uniform results in Rhino and SpiderMonkey. 1.175 + */ 1.176 +function getIdentifiers(f) 1.177 +{ 1.178 + var str = condenseStr(f.toString()); 1.179 + var arr = str.split('Z'); 1.180 + 1.181 + /* 1.182 + * The identifiers are the 1st char of each split substring 1.183 + * EXCEPT the first one, which is just ('\n' +) 'function '. 1.184 + * 1.185 + * Thus note the 1st identifier will be stored in |arr[1]|, 1.186 + * the 2nd one in |arr[2]|, etc., making the indexing easy - 1.187 + */ 1.188 + for (i in arr) 1.189 + arr[i] = arr[i].charAt(0); 1.190 + return arr; 1.191 +} 1.192 + 1.193 + 1.194 +/* 1.195 + * This function is the opposite of a functions like escape(), which take 1.196 + * Unicode characters and return escape sequences for them. Here, we force 1.197 + * the lexer to turn escape sequences back into single characters. 1.198 + * 1.199 + * Note we can't simply do |eval(str)|, since in practice |str| will be an 1.200 + * identifier somewhere in the program (e.g. a function name); thus |eval(str)| 1.201 + * would return the object that the identifier represents: not what we want. 1.202 + * 1.203 + * So we surround |str| lexicographically with quotes to force the lexer to 1.204 + * evaluate it as a string. Have to strip out any linefeeds first, however - 1.205 + */ 1.206 +function condenseStr(str) 1.207 +{ 1.208 + /* 1.209 + * You won't be able to do the next step if |str| has 1.210 + * any carriage returns or linefeeds in it. For example: 1.211 + * 1.212 + * js> eval("'" + '\nHello' + "'"); 1.213 + * 1: SyntaxError: unterminated string literal: 1.214 + * 1: ' 1.215 + * 1: ^ 1.216 + * 1.217 + * So replace them with the empty string - 1.218 + */ 1.219 + str = str.replace(/[\r\n]/g, '') 1.220 + return eval("'" + str + "'") 1.221 + } 1.222 + 1.223 + 1.224 +function addThis() 1.225 +{ 1.226 + statusitems[UBound] = status; 1.227 + actualvalues[UBound] = actual; 1.228 + expectedvalues[UBound] = expect; 1.229 + UBound++; 1.230 +} 1.231 + 1.232 + 1.233 +function test() 1.234 +{ 1.235 + enterFunc('test'); 1.236 + printBugNumber(BUGNUMBER); 1.237 + printStatus(summary); 1.238 + 1.239 + for (var i=0; i<UBound; i++) 1.240 + { 1.241 + reportCompare(expectedvalues[i], actualvalues[i], statusitems[i]); 1.242 + } 1.243 + 1.244 + exitFunc ('test'); 1.245 +}