|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* |
|
7 * |
|
8 * Date: 15 July 2002 |
|
9 * SUMMARY: Testing identifiers with double-byte names |
|
10 * See http://bugzilla.mozilla.org/show_bug.cgi?id=58274 |
|
11 * |
|
12 * Here is a sample of the problem: |
|
13 * |
|
14 * js> function f\u02B1 () {} |
|
15 * |
|
16 * js> f\u02B1.toSource(); |
|
17 * function f¦() {} |
|
18 * |
|
19 * js> f\u02B1.toSource().toSource(); |
|
20 * (new String("function f\xB1() {}")) |
|
21 * |
|
22 * |
|
23 * See how the high-byte information (the 02) has been lost? |
|
24 * The same thing was happening with the toString() method: |
|
25 * |
|
26 * js> f\u02B1.toString(); |
|
27 * |
|
28 * function f¦() { |
|
29 * } |
|
30 * |
|
31 * js> f\u02B1.toString().toSource(); |
|
32 * (new String("\nfunction f\xB1() {\n}\n")) |
|
33 * |
|
34 */ |
|
35 //----------------------------------------------------------------------------- |
|
36 var UBound = 0; |
|
37 var BUGNUMBER = 58274; |
|
38 var summary = 'Testing identifiers with double-byte names'; |
|
39 var status = ''; |
|
40 var statusitems = []; |
|
41 var actual = ''; |
|
42 var actualvalues = []; |
|
43 var expect= ''; |
|
44 var expectedvalues = []; |
|
45 |
|
46 |
|
47 /* |
|
48 * Define a function that uses double-byte identifiers in |
|
49 * "every possible way" |
|
50 * |
|
51 * Then recover each double-byte identifier via f.toString(). |
|
52 * To make this easier, put a 'Z' token before every one. |
|
53 * |
|
54 * Our eval string will be: |
|
55 * |
|
56 * sEval = "function Z\u02b1(Z\u02b2, b) { |
|
57 * try { Z\u02b3 : var Z\u02b4 = Z\u02b1; } |
|
58 * catch (Z\u02b5) { for (var Z\u02b6 in Z\u02b5) |
|
59 * {for (1; 1<0; Z\u02b7++) {new Array()[Z\u02b6] = 1;} };} }"; |
|
60 * |
|
61 * It will be helpful to build this string in stages: |
|
62 */ |
|
63 var s0 = 'function Z'; |
|
64 var s1 = '\u02b1(Z'; |
|
65 var s2 = '\u02b2, b) {try { Z'; |
|
66 var s3 = '\u02b3 : var Z'; |
|
67 var s4 = '\u02b4 = Z'; |
|
68 var s5 = '\u02b1; } catch (Z' |
|
69 var s6 = '\u02b5) { for (var Z'; |
|
70 var s7 = '\u02b6 in Z'; |
|
71 var s8 = '\u02b5){for (1; 1<0; Z'; |
|
72 var s9 = '\u02b7++) {new Array()[Z'; |
|
73 var s10 = '\u02b6] = 1;} };} }'; |
|
74 |
|
75 |
|
76 /* |
|
77 * Concatenate these and eval() to create the function Z\u02b1 |
|
78 */ |
|
79 var sEval = s0 + s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9 + s10; |
|
80 eval(sEval); |
|
81 |
|
82 |
|
83 /* |
|
84 * Recover all the double-byte identifiers via Z\u02b1.toString(). |
|
85 * We'll recover the 1st one as arrID[1], the 2nd one as arrID[2], |
|
86 * and so on ... |
|
87 */ |
|
88 var arrID = getIdentifiers(Z\u02b1); |
|
89 |
|
90 |
|
91 /* |
|
92 * Now check that we got back what we put in - |
|
93 */ |
|
94 status = inSection(1); |
|
95 actual = arrID[1]; |
|
96 expect = s1.charAt(0); |
|
97 addThis(); |
|
98 |
|
99 status = inSection(2); |
|
100 actual = arrID[2]; |
|
101 expect = s2.charAt(0); |
|
102 addThis(); |
|
103 |
|
104 status = inSection(3); |
|
105 actual = arrID[3]; |
|
106 expect = s3.charAt(0); |
|
107 addThis(); |
|
108 |
|
109 status = inSection(4); |
|
110 actual = arrID[4]; |
|
111 expect = s4.charAt(0); |
|
112 addThis(); |
|
113 |
|
114 status = inSection(5); |
|
115 actual = arrID[5]; |
|
116 expect = s5.charAt(0); |
|
117 addThis(); |
|
118 |
|
119 status = inSection(6); |
|
120 actual = arrID[6]; |
|
121 expect = s6.charAt(0); |
|
122 addThis(); |
|
123 |
|
124 status = inSection(7); |
|
125 actual = arrID[7]; |
|
126 expect = s7.charAt(0); |
|
127 addThis(); |
|
128 |
|
129 status = inSection(8); |
|
130 actual = arrID[8]; |
|
131 expect = s8.charAt(0); |
|
132 addThis(); |
|
133 |
|
134 status = inSection(9); |
|
135 actual = arrID[9]; |
|
136 expect = s9.charAt(0); |
|
137 addThis(); |
|
138 |
|
139 status = inSection(10); |
|
140 actual = arrID[10]; |
|
141 expect = s10.charAt(0); |
|
142 addThis(); |
|
143 |
|
144 |
|
145 |
|
146 |
|
147 //----------------------------------------------------------------------------- |
|
148 test(); |
|
149 //----------------------------------------------------------------------------- |
|
150 |
|
151 |
|
152 |
|
153 /* |
|
154 * Goal: recover the double-byte identifiers from f.toString() |
|
155 * by getting the very next character after each 'Z' token. |
|
156 * |
|
157 * The return value will be an array |arr| indexed such that |
|
158 * |arr[1]| is the 1st identifier, |arr[2]| the 2nd, and so on. |
|
159 * |
|
160 * Note, however, f.toString() is implementation-independent. |
|
161 * For example, it may begin with '\nfunction' instead of 'function'. |
|
162 * |
|
163 * Rhino uses a Unicode representation for f.toString(); whereas |
|
164 * SpiderMonkey uses an ASCII representation, putting escape sequences |
|
165 * for non-ASCII characters. For example, if a function is called f\u02B1, |
|
166 * then in Rhino the toString() method will present a 2-character Unicode |
|
167 * string for its name, whereas SpiderMonkey will present a 7-character |
|
168 * ASCII string for its name: the string literal 'f\u02B1'. |
|
169 * |
|
170 * So we force the lexer to condense the string before we use it. |
|
171 * This will give uniform results in Rhino and SpiderMonkey. |
|
172 */ |
|
173 function getIdentifiers(f) |
|
174 { |
|
175 var str = condenseStr(f.toString()); |
|
176 var arr = str.split('Z'); |
|
177 |
|
178 /* |
|
179 * The identifiers are the 1st char of each split substring |
|
180 * EXCEPT the first one, which is just ('\n' +) 'function '. |
|
181 * |
|
182 * Thus note the 1st identifier will be stored in |arr[1]|, |
|
183 * the 2nd one in |arr[2]|, etc., making the indexing easy - |
|
184 */ |
|
185 for (i in arr) |
|
186 arr[i] = arr[i].charAt(0); |
|
187 return arr; |
|
188 } |
|
189 |
|
190 |
|
191 /* |
|
192 * This function is the opposite of a functions like escape(), which take |
|
193 * Unicode characters and return escape sequences for them. Here, we force |
|
194 * the lexer to turn escape sequences back into single characters. |
|
195 * |
|
196 * Note we can't simply do |eval(str)|, since in practice |str| will be an |
|
197 * identifier somewhere in the program (e.g. a function name); thus |eval(str)| |
|
198 * would return the object that the identifier represents: not what we want. |
|
199 * |
|
200 * So we surround |str| lexicographically with quotes to force the lexer to |
|
201 * evaluate it as a string. Have to strip out any linefeeds first, however - |
|
202 */ |
|
203 function condenseStr(str) |
|
204 { |
|
205 /* |
|
206 * You won't be able to do the next step if |str| has |
|
207 * any carriage returns or linefeeds in it. For example: |
|
208 * |
|
209 * js> eval("'" + '\nHello' + "'"); |
|
210 * 1: SyntaxError: unterminated string literal: |
|
211 * 1: ' |
|
212 * 1: ^ |
|
213 * |
|
214 * So replace them with the empty string - |
|
215 */ |
|
216 str = str.replace(/[\r\n]/g, '') |
|
217 return eval("'" + str + "'") |
|
218 } |
|
219 |
|
220 |
|
221 function addThis() |
|
222 { |
|
223 statusitems[UBound] = status; |
|
224 actualvalues[UBound] = actual; |
|
225 expectedvalues[UBound] = expect; |
|
226 UBound++; |
|
227 } |
|
228 |
|
229 |
|
230 function test() |
|
231 { |
|
232 enterFunc('test'); |
|
233 printBugNumber(BUGNUMBER); |
|
234 printStatus(summary); |
|
235 |
|
236 for (var i=0; i<UBound; i++) |
|
237 { |
|
238 reportCompare(expectedvalues[i], actualvalues[i], statusitems[i]); |
|
239 } |
|
240 |
|
241 exitFunc ('test'); |
|
242 } |