|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * vim: set ts=8 sts=4 et sw=4 tw=99: |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 // JS lexical scanner. |
|
8 |
|
9 #include "frontend/TokenStream.h" |
|
10 |
|
11 #include "mozilla/PodOperations.h" |
|
12 |
|
13 #include <ctype.h> |
|
14 #include <stdarg.h> |
|
15 #include <stdio.h> |
|
16 #include <string.h> |
|
17 |
|
18 #include "jsatom.h" |
|
19 #include "jscntxt.h" |
|
20 #include "jsexn.h" |
|
21 #include "jsnum.h" |
|
22 #include "jsworkers.h" |
|
23 |
|
24 #include "frontend/BytecodeCompiler.h" |
|
25 #include "js/CharacterEncoding.h" |
|
26 #include "vm/Keywords.h" |
|
27 #include "vm/StringBuffer.h" |
|
28 |
|
29 using namespace js; |
|
30 using namespace js::frontend; |
|
31 using namespace js::unicode; |
|
32 |
|
33 using mozilla::Maybe; |
|
34 using mozilla::PodAssign; |
|
35 using mozilla::PodCopy; |
|
36 using mozilla::PodZero; |
|
37 |
|
38 struct KeywordInfo { |
|
39 const char *chars; // C string with keyword text |
|
40 TokenKind tokentype; |
|
41 JSVersion version; |
|
42 }; |
|
43 |
|
44 static const KeywordInfo keywords[] = { |
|
45 #define KEYWORD_INFO(keyword, name, type, version) \ |
|
46 {js_##keyword##_str, type, version}, |
|
47 FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO) |
|
48 #undef KEYWORD_INFO |
|
49 }; |
|
50 |
|
51 // Returns a KeywordInfo for the specified characters, or nullptr if the string |
|
52 // is not a keyword. |
|
53 static const KeywordInfo * |
|
54 FindKeyword(const jschar *s, size_t length) |
|
55 { |
|
56 JS_ASSERT(length != 0); |
|
57 |
|
58 size_t i; |
|
59 const KeywordInfo *kw; |
|
60 const char *chars; |
|
61 |
|
62 #define JSKW_LENGTH() length |
|
63 #define JSKW_AT(column) s[column] |
|
64 #define JSKW_GOT_MATCH(index) i = (index); goto got_match; |
|
65 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess; |
|
66 #define JSKW_NO_MATCH() goto no_match; |
|
67 #include "jsautokw.h" |
|
68 #undef JSKW_NO_MATCH |
|
69 #undef JSKW_TEST_GUESS |
|
70 #undef JSKW_GOT_MATCH |
|
71 #undef JSKW_AT |
|
72 #undef JSKW_LENGTH |
|
73 |
|
74 got_match: |
|
75 return &keywords[i]; |
|
76 |
|
77 test_guess: |
|
78 kw = &keywords[i]; |
|
79 chars = kw->chars; |
|
80 do { |
|
81 if (*s++ != (unsigned char)(*chars++)) |
|
82 goto no_match; |
|
83 } while (--length != 0); |
|
84 return kw; |
|
85 |
|
86 no_match: |
|
87 return nullptr; |
|
88 } |
|
89 |
|
90 bool |
|
91 frontend::IsIdentifier(JSLinearString *str) |
|
92 { |
|
93 const jschar *chars = str->chars(); |
|
94 size_t length = str->length(); |
|
95 |
|
96 if (length == 0) |
|
97 return false; |
|
98 jschar c = *chars; |
|
99 if (!IsIdentifierStart(c)) |
|
100 return false; |
|
101 const jschar *end = chars + length; |
|
102 while (++chars != end) { |
|
103 c = *chars; |
|
104 if (!IsIdentifierPart(c)) |
|
105 return false; |
|
106 } |
|
107 return true; |
|
108 } |
|
109 |
|
110 bool |
|
111 frontend::IsKeyword(JSLinearString *str) |
|
112 { |
|
113 return FindKeyword(str->chars(), str->length()) != nullptr; |
|
114 } |
|
115 |
|
116 TokenStream::SourceCoords::SourceCoords(ExclusiveContext *cx, uint32_t ln) |
|
117 : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0) |
|
118 { |
|
119 // This is actually necessary! Removing it causes compile errors on |
|
120 // GCC and clang. You could try declaring this: |
|
121 // |
|
122 // const uint32_t TokenStream::SourceCoords::MAX_PTR; |
|
123 // |
|
124 // which fixes the GCC/clang error, but causes bustage on Windows. Sigh. |
|
125 // |
|
126 uint32_t maxPtr = MAX_PTR; |
|
127 |
|
128 // The first line begins at buffer offset 0. MAX_PTR is the sentinel. The |
|
129 // appends cannot fail because |lineStartOffsets_| has statically-allocated |
|
130 // elements. |
|
131 JS_ASSERT(lineStartOffsets_.capacity() >= 2); |
|
132 (void)lineStartOffsets_.reserve(2); |
|
133 lineStartOffsets_.infallibleAppend(0); |
|
134 lineStartOffsets_.infallibleAppend(maxPtr); |
|
135 } |
|
136 |
|
137 MOZ_ALWAYS_INLINE void |
|
138 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset) |
|
139 { |
|
140 uint32_t lineIndex = lineNumToIndex(lineNum); |
|
141 uint32_t sentinelIndex = lineStartOffsets_.length() - 1; |
|
142 |
|
143 JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR); |
|
144 |
|
145 if (lineIndex == sentinelIndex) { |
|
146 // We haven't seen this newline before. Update lineStartOffsets_. |
|
147 // We ignore any failures due to OOM -- because we always have a |
|
148 // sentinel node, it'll just be like the newline wasn't present. I.e. |
|
149 // the line numbers will be wrong, but the code won't crash or anything |
|
150 // like that. |
|
151 lineStartOffsets_[lineIndex] = lineStartOffset; |
|
152 |
|
153 uint32_t maxPtr = MAX_PTR; |
|
154 (void)lineStartOffsets_.append(maxPtr); |
|
155 |
|
156 } else { |
|
157 // We have seen this newline before (and ungot it). Do nothing (other |
|
158 // than checking it hasn't mysteriously changed). |
|
159 JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset); |
|
160 } |
|
161 } |
|
162 |
|
163 MOZ_ALWAYS_INLINE bool |
|
164 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other) |
|
165 { |
|
166 JS_ASSERT(lineStartOffsets_.back() == MAX_PTR); |
|
167 JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR); |
|
168 |
|
169 if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) |
|
170 return true; |
|
171 |
|
172 uint32_t sentinelIndex = lineStartOffsets_.length() - 1; |
|
173 lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex]; |
|
174 |
|
175 for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) { |
|
176 if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) |
|
177 return false; |
|
178 } |
|
179 return true; |
|
180 } |
|
181 |
|
182 MOZ_ALWAYS_INLINE uint32_t |
|
183 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const |
|
184 { |
|
185 uint32_t iMin, iMax, iMid; |
|
186 |
|
187 if (lineStartOffsets_[lastLineIndex_] <= offset) { |
|
188 // If we reach here, offset is on a line the same as or higher than |
|
189 // last time. Check first for the +0, +1, +2 cases, because they |
|
190 // typically cover 85--98% of cases. |
|
191 if (offset < lineStartOffsets_[lastLineIndex_ + 1]) |
|
192 return lastLineIndex_; // lineIndex is same as last time |
|
193 |
|
194 // If we reach here, there must be at least one more entry (plus the |
|
195 // sentinel). Try it. |
|
196 lastLineIndex_++; |
|
197 if (offset < lineStartOffsets_[lastLineIndex_ + 1]) |
|
198 return lastLineIndex_; // lineIndex is one higher than last time |
|
199 |
|
200 // The same logic applies here. |
|
201 lastLineIndex_++; |
|
202 if (offset < lineStartOffsets_[lastLineIndex_ + 1]) { |
|
203 return lastLineIndex_; // lineIndex is two higher than last time |
|
204 } |
|
205 |
|
206 // No luck. Oh well, we have a better-than-default starting point for |
|
207 // the binary search. |
|
208 iMin = lastLineIndex_ + 1; |
|
209 JS_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel |
|
210 |
|
211 } else { |
|
212 iMin = 0; |
|
213 } |
|
214 |
|
215 // This is a binary search with deferred detection of equality, which was |
|
216 // marginally faster in this case than a standard binary search. |
|
217 // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we |
|
218 // want one before that. |
|
219 iMax = lineStartOffsets_.length() - 2; |
|
220 while (iMax > iMin) { |
|
221 iMid = iMin + (iMax - iMin) / 2; |
|
222 if (offset >= lineStartOffsets_[iMid + 1]) |
|
223 iMin = iMid + 1; // offset is above lineStartOffsets_[iMid] |
|
224 else |
|
225 iMax = iMid; // offset is below or within lineStartOffsets_[iMid] |
|
226 } |
|
227 JS_ASSERT(iMax == iMin); |
|
228 JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]); |
|
229 lastLineIndex_ = iMin; |
|
230 return iMin; |
|
231 } |
|
232 |
|
233 uint32_t |
|
234 TokenStream::SourceCoords::lineNum(uint32_t offset) const |
|
235 { |
|
236 uint32_t lineIndex = lineIndexOf(offset); |
|
237 return lineIndexToNum(lineIndex); |
|
238 } |
|
239 |
|
240 uint32_t |
|
241 TokenStream::SourceCoords::columnIndex(uint32_t offset) const |
|
242 { |
|
243 uint32_t lineIndex = lineIndexOf(offset); |
|
244 uint32_t lineStartOffset = lineStartOffsets_[lineIndex]; |
|
245 JS_ASSERT(offset >= lineStartOffset); |
|
246 return offset - lineStartOffset; |
|
247 } |
|
248 |
|
249 void |
|
250 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, |
|
251 uint32_t *columnIndex) const |
|
252 { |
|
253 uint32_t lineIndex = lineIndexOf(offset); |
|
254 *lineNum = lineIndexToNum(lineIndex); |
|
255 uint32_t lineStartOffset = lineStartOffsets_[lineIndex]; |
|
256 JS_ASSERT(offset >= lineStartOffset); |
|
257 *columnIndex = offset - lineStartOffset; |
|
258 } |
|
259 |
|
260 #ifdef _MSC_VER |
|
261 #pragma warning(push) |
|
262 #pragma warning(disable:4351) |
|
263 #endif |
|
264 |
|
265 // Initialize members that aren't initialized in |init|. |
|
266 TokenStream::TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options, |
|
267 const jschar *base, size_t length, StrictModeGetter *smg) |
|
268 : srcCoords(cx, options.lineno), |
|
269 options_(options), |
|
270 tokens(), |
|
271 cursor(), |
|
272 lookahead(), |
|
273 lineno(options.lineno), |
|
274 flags(), |
|
275 linebase(base - options.column), |
|
276 prevLinebase(nullptr), |
|
277 userbuf(cx, base - options.column, length + options.column), // See comment below |
|
278 filename(options.filename()), |
|
279 displayURL_(nullptr), |
|
280 sourceMapURL_(nullptr), |
|
281 tokenbuf(cx), |
|
282 cx(cx), |
|
283 originPrincipals(options.originPrincipals(cx)), |
|
284 strictModeGetter(smg) |
|
285 { |
|
286 // The caller must ensure that a reference is held on the supplied principals |
|
287 // throughout compilation. |
|
288 JS_ASSERT_IF(originPrincipals, originPrincipals->refcount > 0); |
|
289 |
|
290 // Column numbers are computed as offsets from the current line's base, so the |
|
291 // initial line's base must be included in the buffer. linebase and userbuf |
|
292 // were adjusted above, and if we are starting tokenization part way through |
|
293 // this line then adjust the next character. |
|
294 userbuf.setAddressOfNextRawChar(base); |
|
295 |
|
296 // Nb: the following tables could be static, but initializing them here is |
|
297 // much easier. Don't worry, the time to initialize them for each |
|
298 // TokenStream is trivial. See bug 639420. |
|
299 |
|
300 // See getChar() for an explanation of maybeEOL[]. |
|
301 memset(maybeEOL, 0, sizeof(maybeEOL)); |
|
302 maybeEOL[unsigned('\n')] = true; |
|
303 maybeEOL[unsigned('\r')] = true; |
|
304 maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true; |
|
305 maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true; |
|
306 |
|
307 // See getTokenInternal() for an explanation of maybeStrSpecial[]. |
|
308 memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial)); |
|
309 maybeStrSpecial[unsigned('"')] = true; |
|
310 maybeStrSpecial[unsigned('\'')] = true; |
|
311 maybeStrSpecial[unsigned('\\')] = true; |
|
312 maybeStrSpecial[unsigned('\n')] = true; |
|
313 maybeStrSpecial[unsigned('\r')] = true; |
|
314 maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true; |
|
315 maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true; |
|
316 maybeStrSpecial[unsigned(EOF & 0xff)] = true; |
|
317 |
|
318 // See Parser::assignExpr() for an explanation of isExprEnding[]. |
|
319 memset(isExprEnding, 0, sizeof(isExprEnding)); |
|
320 isExprEnding[TOK_COMMA] = 1; |
|
321 isExprEnding[TOK_SEMI] = 1; |
|
322 isExprEnding[TOK_COLON] = 1; |
|
323 isExprEnding[TOK_RP] = 1; |
|
324 isExprEnding[TOK_RB] = 1; |
|
325 isExprEnding[TOK_RC] = 1; |
|
326 } |
|
327 |
|
328 #ifdef _MSC_VER |
|
329 #pragma warning(pop) |
|
330 #endif |
|
331 |
|
332 TokenStream::~TokenStream() |
|
333 { |
|
334 js_free(displayURL_); |
|
335 js_free(sourceMapURL_); |
|
336 |
|
337 JS_ASSERT_IF(originPrincipals, originPrincipals->refcount); |
|
338 } |
|
339 |
|
340 // Use the fastest available getc. |
|
341 #if defined(HAVE_GETC_UNLOCKED) |
|
342 # define fast_getc getc_unlocked |
|
343 #elif defined(HAVE__GETC_NOLOCK) |
|
344 # define fast_getc _getc_nolock |
|
345 #else |
|
346 # define fast_getc getc |
|
347 #endif |
|
348 |
|
349 MOZ_ALWAYS_INLINE void |
|
350 TokenStream::updateLineInfoForEOL() |
|
351 { |
|
352 prevLinebase = linebase; |
|
353 linebase = userbuf.addressOfNextRawChar(); |
|
354 lineno++; |
|
355 srcCoords.add(lineno, linebase - userbuf.base()); |
|
356 } |
|
357 |
|
358 MOZ_ALWAYS_INLINE void |
|
359 TokenStream::updateFlagsForEOL() |
|
360 { |
|
361 flags.isDirtyLine = false; |
|
362 } |
|
363 |
|
364 // This gets the next char, normalizing all EOL sequences to '\n' as it goes. |
|
365 int32_t |
|
366 TokenStream::getChar() |
|
367 { |
|
368 int32_t c; |
|
369 if (MOZ_LIKELY(userbuf.hasRawChars())) { |
|
370 c = userbuf.getRawChar(); |
|
371 |
|
372 // Normalize the jschar if it was a newline. We need to detect any of |
|
373 // these four characters: '\n' (0x000a), '\r' (0x000d), |
|
374 // LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each |
|
375 // one in turn is slow, so we use a single probabilistic check, and if |
|
376 // that succeeds, test for them individually. |
|
377 // |
|
378 // We use the bottom 8 bits to index into a lookup table, succeeding |
|
379 // when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which |
|
380 // are by the far the most common) this gives false positives for '(' |
|
381 // (0x0028) and ')' (0x0029). We could avoid those by incorporating |
|
382 // the 13th bit of d into the lookup, but that requires extra shifting |
|
383 // and masking and isn't worthwhile. See TokenStream::TokenStream() |
|
384 // for the initialization of the relevant entries in the table. |
|
385 if (MOZ_UNLIKELY(maybeEOL[c & 0xff])) { |
|
386 if (c == '\n') |
|
387 goto eol; |
|
388 if (c == '\r') { |
|
389 // If it's a \r\n sequence: treat as a single EOL, skip over the \n. |
|
390 if (userbuf.hasRawChars()) |
|
391 userbuf.matchRawChar('\n'); |
|
392 goto eol; |
|
393 } |
|
394 if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) |
|
395 goto eol; |
|
396 } |
|
397 return c; |
|
398 } |
|
399 |
|
400 flags.isEOF = true; |
|
401 return EOF; |
|
402 |
|
403 eol: |
|
404 updateLineInfoForEOL(); |
|
405 return '\n'; |
|
406 } |
|
407 |
|
408 // This gets the next char. It does nothing special with EOL sequences, not |
|
409 // even updating the line counters. It can be used safely if (a) the |
|
410 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if |
|
411 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used |
|
412 // before it's ungotten. |
|
413 int32_t |
|
414 TokenStream::getCharIgnoreEOL() |
|
415 { |
|
416 if (MOZ_LIKELY(userbuf.hasRawChars())) |
|
417 return userbuf.getRawChar(); |
|
418 |
|
419 flags.isEOF = true; |
|
420 return EOF; |
|
421 } |
|
422 |
|
423 void |
|
424 TokenStream::ungetChar(int32_t c) |
|
425 { |
|
426 if (c == EOF) |
|
427 return; |
|
428 JS_ASSERT(!userbuf.atStart()); |
|
429 userbuf.ungetRawChar(); |
|
430 if (c == '\n') { |
|
431 #ifdef DEBUG |
|
432 int32_t c2 = userbuf.peekRawChar(); |
|
433 JS_ASSERT(TokenBuf::isRawEOLChar(c2)); |
|
434 #endif |
|
435 |
|
436 // If it's a \r\n sequence, also unget the \r. |
|
437 if (!userbuf.atStart()) |
|
438 userbuf.matchRawCharBackwards('\r'); |
|
439 |
|
440 JS_ASSERT(prevLinebase); // we should never get more than one EOL char |
|
441 linebase = prevLinebase; |
|
442 prevLinebase = nullptr; |
|
443 lineno--; |
|
444 } else { |
|
445 JS_ASSERT(userbuf.peekRawChar() == c); |
|
446 } |
|
447 } |
|
448 |
|
449 void |
|
450 TokenStream::ungetCharIgnoreEOL(int32_t c) |
|
451 { |
|
452 if (c == EOF) |
|
453 return; |
|
454 JS_ASSERT(!userbuf.atStart()); |
|
455 userbuf.ungetRawChar(); |
|
456 } |
|
457 |
|
458 // Return true iff |n| raw characters can be read from this without reading past |
|
459 // EOF or a newline, and copy those characters into |cp| if so. The characters |
|
460 // are not consumed: use skipChars(n) to do so after checking that the consumed |
|
461 // characters had appropriate values. |
|
462 bool |
|
463 TokenStream::peekChars(int n, jschar *cp) |
|
464 { |
|
465 int i, j; |
|
466 int32_t c; |
|
467 |
|
468 for (i = 0; i < n; i++) { |
|
469 c = getCharIgnoreEOL(); |
|
470 if (c == EOF) |
|
471 break; |
|
472 if (c == '\n') { |
|
473 ungetCharIgnoreEOL(c); |
|
474 break; |
|
475 } |
|
476 cp[i] = jschar(c); |
|
477 } |
|
478 for (j = i - 1; j >= 0; j--) |
|
479 ungetCharIgnoreEOL(cp[j]); |
|
480 return i == n; |
|
481 } |
|
482 |
|
483 const jschar * |
|
484 TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max) |
|
485 { |
|
486 JS_ASSERT(base_ <= p && p <= limit_); |
|
487 |
|
488 size_t n = 0; |
|
489 while (true) { |
|
490 if (p >= limit_) |
|
491 break; |
|
492 if (n >= max) |
|
493 break; |
|
494 if (TokenBuf::isRawEOLChar(*p++)) |
|
495 break; |
|
496 n++; |
|
497 } |
|
498 return p; |
|
499 } |
|
500 |
|
501 void |
|
502 TokenStream::advance(size_t position) |
|
503 { |
|
504 const jschar *end = userbuf.base() + position; |
|
505 while (userbuf.addressOfNextRawChar() < end) |
|
506 getChar(); |
|
507 |
|
508 Token *cur = &tokens[cursor]; |
|
509 cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base(); |
|
510 cur->type = TOK_ERROR; |
|
511 lookahead = 0; |
|
512 } |
|
513 |
|
514 void |
|
515 TokenStream::tell(Position *pos) |
|
516 { |
|
517 pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true); |
|
518 pos->flags = flags; |
|
519 pos->lineno = lineno; |
|
520 pos->linebase = linebase; |
|
521 pos->prevLinebase = prevLinebase; |
|
522 pos->lookahead = lookahead; |
|
523 pos->currentToken = currentToken(); |
|
524 for (unsigned i = 0; i < lookahead; i++) |
|
525 pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask]; |
|
526 } |
|
527 |
|
528 void |
|
529 TokenStream::seek(const Position &pos) |
|
530 { |
|
531 userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true); |
|
532 flags = pos.flags; |
|
533 lineno = pos.lineno; |
|
534 linebase = pos.linebase; |
|
535 prevLinebase = pos.prevLinebase; |
|
536 lookahead = pos.lookahead; |
|
537 |
|
538 tokens[cursor] = pos.currentToken; |
|
539 for (unsigned i = 0; i < lookahead; i++) |
|
540 tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i]; |
|
541 } |
|
542 |
|
543 bool |
|
544 TokenStream::seek(const Position &pos, const TokenStream &other) |
|
545 { |
|
546 if (!srcCoords.fill(other.srcCoords)) |
|
547 return false; |
|
548 seek(pos); |
|
549 return true; |
|
550 } |
|
551 |
|
552 bool |
|
553 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, |
|
554 va_list args) |
|
555 { |
|
556 // In strict mode code, this is an error, not merely a warning. |
|
557 unsigned flags = JSREPORT_STRICT; |
|
558 if (strictMode) |
|
559 flags |= JSREPORT_ERROR; |
|
560 else if (options().extraWarningsOption) |
|
561 flags |= JSREPORT_WARNING; |
|
562 else |
|
563 return true; |
|
564 |
|
565 return reportCompileErrorNumberVA(offset, flags, errorNumber, args); |
|
566 } |
|
567 |
|
568 void |
|
569 CompileError::throwError(JSContext *cx) |
|
570 { |
|
571 // If there's a runtime exception type associated with this error |
|
572 // number, set that as the pending exception. For errors occuring at |
|
573 // compile time, this is very likely to be a JSEXN_SYNTAXERR. |
|
574 // |
|
575 // If an exception is thrown but not caught, the JSREPORT_EXCEPTION |
|
576 // flag will be set in report.flags. Proper behavior for an error |
|
577 // reporter is to ignore a report with this flag for all but top-level |
|
578 // compilation errors. The exception will remain pending, and so long |
|
579 // as the non-top-level "load", "eval", or "compile" native function |
|
580 // returns false, the top-level reporter will eventually receive the |
|
581 // uncaught exception report. |
|
582 if (!js_ErrorToException(cx, message, &report, nullptr, nullptr)) |
|
583 CallErrorReporter(cx, message, &report); |
|
584 } |
|
585 |
|
586 CompileError::~CompileError() |
|
587 { |
|
588 js_free((void*)report.uclinebuf); |
|
589 js_free((void*)report.linebuf); |
|
590 js_free((void*)report.ucmessage); |
|
591 js_free(message); |
|
592 message = nullptr; |
|
593 |
|
594 if (report.messageArgs) { |
|
595 if (argumentsType == ArgumentsAreASCII) { |
|
596 unsigned i = 0; |
|
597 while (report.messageArgs[i]) |
|
598 js_free((void*)report.messageArgs[i++]); |
|
599 } |
|
600 js_free(report.messageArgs); |
|
601 } |
|
602 |
|
603 PodZero(&report); |
|
604 } |
|
605 |
|
606 bool |
|
607 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, |
|
608 va_list args) |
|
609 { |
|
610 bool warning = JSREPORT_IS_WARNING(flags); |
|
611 |
|
612 if (warning && options().werrorOption) { |
|
613 flags &= ~JSREPORT_WARNING; |
|
614 warning = false; |
|
615 } |
|
616 |
|
617 // On the main thread, report the error immediately. When compiling off |
|
618 // thread, save the error so that the main thread can report it later. |
|
619 CompileError tempErr; |
|
620 CompileError &err = cx->isJSContext() ? tempErr : cx->addPendingCompileError(); |
|
621 |
|
622 err.report.flags = flags; |
|
623 err.report.errorNumber = errorNumber; |
|
624 err.report.filename = filename; |
|
625 err.report.originPrincipals = originPrincipals; |
|
626 if (offset == NoOffset) { |
|
627 err.report.lineno = 0; |
|
628 err.report.column = 0; |
|
629 } else { |
|
630 err.report.lineno = srcCoords.lineNum(offset); |
|
631 err.report.column = srcCoords.columnIndex(offset); |
|
632 } |
|
633 |
|
634 err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII; |
|
635 |
|
636 if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, nullptr, errorNumber, &err.message, |
|
637 &err.report, err.argumentsType, args)) |
|
638 { |
|
639 return false; |
|
640 } |
|
641 |
|
642 // Given a token, T, that we want to complain about: if T's (starting) |
|
643 // lineno doesn't match TokenStream's lineno, that means we've scanned past |
|
644 // the line that T starts on, which makes it hard to print some or all of |
|
645 // T's (starting) line for context. |
|
646 // |
|
647 // So we don't even try, leaving report.linebuf and friends zeroed. This |
|
648 // means that any error involving a multi-line token (e.g. an unterminated |
|
649 // multi-line string literal) won't have a context printed. |
|
650 if (offset != NoOffset && err.report.lineno == lineno) { |
|
651 const jschar *tokenStart = userbuf.base() + offset; |
|
652 |
|
653 // We show only a portion (a "window") of the line around the erroneous |
|
654 // token -- the first char in the token, plus |windowRadius| chars |
|
655 // before it and |windowRadius - 1| chars after it. This is because |
|
656 // lines can be very long and printing the whole line is (a) not that |
|
657 // helpful, and (b) can waste a lot of memory. See bug 634444. |
|
658 static const size_t windowRadius = 60; |
|
659 |
|
660 // Truncate at the front if necessary. |
|
661 const jschar *windowBase = (linebase + windowRadius < tokenStart) |
|
662 ? tokenStart - windowRadius |
|
663 : linebase; |
|
664 uint32_t windowOffset = tokenStart - windowBase; |
|
665 |
|
666 // Find EOL, or truncate at the back if necessary. |
|
667 const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius); |
|
668 size_t windowLength = windowLimit - windowBase; |
|
669 JS_ASSERT(windowLength <= windowRadius * 2); |
|
670 |
|
671 // Create the windowed strings. |
|
672 StringBuffer windowBuf(cx); |
|
673 if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0)) |
|
674 return false; |
|
675 |
|
676 // Unicode and char versions of the window into the offending source |
|
677 // line, without final \n. |
|
678 err.report.uclinebuf = windowBuf.extractWellSized(); |
|
679 if (!err.report.uclinebuf) |
|
680 return false; |
|
681 TwoByteChars tbchars(err.report.uclinebuf, windowLength); |
|
682 err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str(); |
|
683 if (!err.report.linebuf) |
|
684 return false; |
|
685 |
|
686 err.report.tokenptr = err.report.linebuf + windowOffset; |
|
687 err.report.uctokenptr = err.report.uclinebuf + windowOffset; |
|
688 } |
|
689 |
|
690 if (cx->isJSContext()) |
|
691 err.throwError(cx->asJSContext()); |
|
692 |
|
693 return warning; |
|
694 } |
|
695 |
|
696 bool |
|
697 TokenStream::reportStrictModeError(unsigned errorNumber, ...) |
|
698 { |
|
699 va_list args; |
|
700 va_start(args, errorNumber); |
|
701 bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(), |
|
702 errorNumber, args); |
|
703 va_end(args); |
|
704 return result; |
|
705 } |
|
706 |
|
707 bool |
|
708 TokenStream::reportError(unsigned errorNumber, ...) |
|
709 { |
|
710 va_list args; |
|
711 va_start(args, errorNumber); |
|
712 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber, |
|
713 args); |
|
714 va_end(args); |
|
715 return result; |
|
716 } |
|
717 |
|
718 bool |
|
719 TokenStream::reportWarning(unsigned errorNumber, ...) |
|
720 { |
|
721 va_list args; |
|
722 va_start(args, errorNumber); |
|
723 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING, |
|
724 errorNumber, args); |
|
725 va_end(args); |
|
726 return result; |
|
727 } |
|
728 |
|
729 bool |
|
730 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args) |
|
731 { |
|
732 if (!options().extraWarningsOption) |
|
733 return true; |
|
734 |
|
735 return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args); |
|
736 } |
|
737 |
|
738 void |
|
739 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...) |
|
740 { |
|
741 va_list args; |
|
742 va_start(args, errorNumber); |
|
743 reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args); |
|
744 va_end(args); |
|
745 } |
|
746 |
|
747 // We have encountered a '\': check for a Unicode escape sequence after it. |
|
748 // Return 'true' and the character code value (by value) if we found a |
|
749 // Unicode escape sequence. Otherwise, return 'false'. In both cases, do not |
|
750 // advance along the buffer. |
|
751 bool |
|
752 TokenStream::peekUnicodeEscape(int *result) |
|
753 { |
|
754 jschar cp[5]; |
|
755 |
|
756 if (peekChars(5, cp) && cp[0] == 'u' && |
|
757 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && |
|
758 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) |
|
759 { |
|
760 *result = (((((JS7_UNHEX(cp[1]) << 4) |
|
761 + JS7_UNHEX(cp[2])) << 4) |
|
762 + JS7_UNHEX(cp[3])) << 4) |
|
763 + JS7_UNHEX(cp[4]); |
|
764 return true; |
|
765 } |
|
766 return false; |
|
767 } |
|
768 |
|
769 bool |
|
770 TokenStream::matchUnicodeEscapeIdStart(int32_t *cp) |
|
771 { |
|
772 if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) { |
|
773 skipChars(5); |
|
774 return true; |
|
775 } |
|
776 return false; |
|
777 } |
|
778 |
|
779 bool |
|
780 TokenStream::matchUnicodeEscapeIdent(int32_t *cp) |
|
781 { |
|
782 if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) { |
|
783 skipChars(5); |
|
784 return true; |
|
785 } |
|
786 return false; |
|
787 } |
|
788 |
|
789 // Helper function which returns true if the first length(q) characters in p are |
|
790 // the same as the characters in q. |
|
791 static bool |
|
792 CharsMatch(const jschar *p, const char *q) { |
|
793 while (*q) { |
|
794 if (*p++ != *q++) |
|
795 return false; |
|
796 } |
|
797 return true; |
|
798 } |
|
799 |
|
800 bool |
|
801 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated) |
|
802 { |
|
803 // Match directive comments used in debugging, such as "//# sourceURL" and |
|
804 // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated. |
|
805 // |
|
806 // To avoid a crashing bug in IE, several JavaScript transpilers wrap single |
|
807 // line comments containing a source mapping URL inside a multiline |
|
808 // comment. To avoid potentially expensive lookahead and backtracking, we |
|
809 // only check for this case if we encounter a '#' character. |
|
810 |
|
811 if (!getDisplayURL(isMultiline, shouldWarnDeprecated)) |
|
812 return false; |
|
813 if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated)) |
|
814 return false; |
|
815 |
|
816 return true; |
|
817 } |
|
818 |
|
819 bool |
|
820 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated, |
|
821 const char *directive, int directiveLength, |
|
822 const char *errorMsgPragma, jschar **destination) { |
|
823 JS_ASSERT(directiveLength <= 18); |
|
824 jschar peeked[18]; |
|
825 int32_t c; |
|
826 |
|
827 if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) { |
|
828 if (shouldWarnDeprecated && |
|
829 !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) |
|
830 return false; |
|
831 |
|
832 skipChars(directiveLength); |
|
833 tokenbuf.clear(); |
|
834 |
|
835 while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) { |
|
836 getChar(); |
|
837 // Debugging directives can occur in both single- and multi-line |
|
838 // comments. If we're currently inside a multi-line comment, we also |
|
839 // need to recognize multi-line comment terminators. |
|
840 if (isMultiline && c == '*' && peekChar() == '/') { |
|
841 ungetChar('*'); |
|
842 break; |
|
843 } |
|
844 tokenbuf.append(c); |
|
845 } |
|
846 |
|
847 if (tokenbuf.empty()) |
|
848 // The directive's URL was missing, but this is not quite an |
|
849 // exception that we should stop and drop everything for. |
|
850 return true; |
|
851 |
|
852 size_t length = tokenbuf.length(); |
|
853 |
|
854 js_free(*destination); |
|
855 *destination = cx->pod_malloc<jschar>(length + 1); |
|
856 if (!*destination) |
|
857 return false; |
|
858 |
|
859 PodCopy(*destination, tokenbuf.begin(), length); |
|
860 (*destination)[length] = '\0'; |
|
861 } |
|
862 |
|
863 return true; |
|
864 } |
|
865 |
|
866 bool |
|
867 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated) |
|
868 { |
|
869 // Match comments of the form "//# sourceURL=<url>" or |
|
870 // "/\* //# sourceURL=<url> *\/" |
|
871 // |
|
872 // Note that while these are labeled "sourceURL" in the source text, |
|
873 // internally we refer to it as a "displayURL" to distinguish what the |
|
874 // developer would like to refer to the source as from the source's actual |
|
875 // URL. |
|
876 |
|
877 return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11, |
|
878 "sourceURL", &displayURL_); |
|
879 } |
|
880 |
|
881 bool |
|
882 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated) |
|
883 { |
|
884 // Match comments of the form "//# sourceMappingURL=<url>" or |
|
885 // "/\* //# sourceMappingURL=<url> *\/" |
|
886 |
|
887 return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18, |
|
888 "sourceMappingURL", &sourceMapURL_); |
|
889 } |
|
890 |
|
891 MOZ_ALWAYS_INLINE Token * |
|
892 TokenStream::newToken(ptrdiff_t adjust) |
|
893 { |
|
894 cursor = (cursor + 1) & ntokensMask; |
|
895 Token *tp = &tokens[cursor]; |
|
896 tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base(); |
|
897 |
|
898 // NOTE: tp->pos.end is not set until the very end of getTokenInternal(). |
|
899 MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end)); |
|
900 |
|
901 return tp; |
|
902 } |
|
903 |
|
904 MOZ_ALWAYS_INLINE JSAtom * |
|
905 TokenStream::atomize(ExclusiveContext *cx, CharBuffer &cb) |
|
906 { |
|
907 return AtomizeChars(cx, cb.begin(), cb.length()); |
|
908 } |
|
909 |
|
910 #ifdef DEBUG |
|
911 static bool |
|
912 IsTokenSane(Token *tp) |
|
913 { |
|
914 // Nb: TOK_EOL should never be used in an actual Token; it should only be |
|
915 // returned as a TokenKind from peekTokenSameLine(). |
|
916 if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL) |
|
917 return false; |
|
918 |
|
919 if (tp->pos.end < tp->pos.begin) |
|
920 return false; |
|
921 |
|
922 return true; |
|
923 } |
|
924 #endif |
|
925 |
|
926 bool |
|
927 TokenStream::putIdentInTokenbuf(const jschar *identStart) |
|
928 { |
|
929 int32_t c, qc; |
|
930 const jschar *tmp = userbuf.addressOfNextRawChar(); |
|
931 userbuf.setAddressOfNextRawChar(identStart); |
|
932 |
|
933 tokenbuf.clear(); |
|
934 for (;;) { |
|
935 c = getCharIgnoreEOL(); |
|
936 if (!IsIdentifierPart(c)) { |
|
937 if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) |
|
938 break; |
|
939 c = qc; |
|
940 } |
|
941 if (!tokenbuf.append(c)) { |
|
942 userbuf.setAddressOfNextRawChar(tmp); |
|
943 return false; |
|
944 } |
|
945 } |
|
946 userbuf.setAddressOfNextRawChar(tmp); |
|
947 return true; |
|
948 } |
|
949 |
|
950 bool |
|
951 TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp) |
|
952 { |
|
953 const KeywordInfo *kw = FindKeyword(s, length); |
|
954 if (!kw) |
|
955 return true; |
|
956 |
|
957 if (kw->tokentype == TOK_RESERVED) |
|
958 return reportError(JSMSG_RESERVED_ID, kw->chars); |
|
959 |
|
960 if (kw->tokentype != TOK_STRICT_RESERVED) { |
|
961 if (kw->version <= versionNumber()) { |
|
962 // Working keyword. |
|
963 if (ttp) { |
|
964 *ttp = kw->tokentype; |
|
965 return true; |
|
966 } |
|
967 return reportError(JSMSG_RESERVED_ID, kw->chars); |
|
968 } |
|
969 |
|
970 // The keyword is not in this version. Treat it as an identifier, unless |
|
971 // it is let which we treat as TOK_STRICT_RESERVED by falling through to |
|
972 // the code below (ES5 forbids it in strict mode). |
|
973 if (kw->tokentype != TOK_LET) |
|
974 return true; |
|
975 } |
|
976 |
|
977 // Strict reserved word. |
|
978 return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars); |
|
979 } |
|
980 |
|
981 enum FirstCharKind { |
|
982 // A jschar has the 'OneChar' kind if it, by itself, constitutes a valid |
|
983 // token that cannot also be a prefix of a longer token. E.g. ';' has the |
|
984 // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens |
|
985 // that begin with '+'. |
|
986 // |
|
987 // The few token kinds satisfying these properties cover roughly 35--45% |
|
988 // of the tokens seen in practice. |
|
989 // |
|
990 // We represent the 'OneChar' kind with any positive value less than |
|
991 // TOK_LIMIT. This representation lets us associate each one-char token |
|
992 // jschar with a TokenKind and thus avoid a subsequent jschar-to-TokenKind |
|
993 // conversion. |
|
994 OneChar_Min = 0, |
|
995 OneChar_Max = TOK_LIMIT - 1, |
|
996 |
|
997 Space = TOK_LIMIT, |
|
998 Ident, |
|
999 Dec, |
|
1000 String, |
|
1001 EOL, |
|
1002 BasePrefix, |
|
1003 Other, |
|
1004 |
|
1005 LastCharKind = Other |
|
1006 }; |
|
1007 |
|
1008 // OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126: |
|
1009 // '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~' |
|
1010 // Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z' |
|
1011 // Dot: 46: '.' |
|
1012 // Equals: 61: '=' |
|
1013 // String: 34, 39: '"', '\'' |
|
1014 // Dec: 49..57: '1'..'9' |
|
1015 // Plus: 43: '+' |
|
1016 // BasePrefix: 48: '0' |
|
1017 // Space: 9, 11, 12, 32: '\t', '\v', '\f', ' ' |
|
1018 // EOL: 10, 13: '\n', '\r' |
|
1019 // |
|
1020 #define T_COMMA TOK_COMMA |
|
1021 #define T_COLON TOK_COLON |
|
1022 #define T_BITNOT TOK_BITNOT |
|
1023 #define _______ Other |
|
1024 static const uint8_t firstCharKinds[] = { |
|
1025 /* 0 1 2 3 4 5 6 7 8 9 */ |
|
1026 /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space, |
|
1027 /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______, |
|
1028 /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______, |
|
1029 /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String, |
|
1030 /* 40+ */ TOK_LP, TOK_RP, _______, _______, T_COMMA,_______, _______, _______,BasePrefix, Dec, |
|
1031 /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON,TOK_SEMI, |
|
1032 /* 60+ */ _______, _______, _______,TOK_HOOK, _______, Ident, Ident, Ident, Ident, Ident, |
|
1033 /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
|
1034 /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
|
1035 /* 90+ */ Ident, TOK_LB, _______, TOK_RB, _______, Ident, _______, Ident, Ident, Ident, |
|
1036 /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
|
1037 /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, |
|
1038 /* 120+ */ Ident, Ident, Ident, TOK_LC, _______, TOK_RC,T_BITNOT, _______ |
|
1039 }; |
|
1040 #undef T_COMMA |
|
1041 #undef T_COLON |
|
1042 #undef T_BITNOT |
|
1043 #undef _______ |
|
1044 |
|
1045 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)), |
|
1046 "Elements of firstCharKinds[] are too small"); |
|
1047 |
|
1048 TokenKind |
|
1049 TokenStream::getTokenInternal(Modifier modifier) |
|
1050 { |
|
1051 int c, qc; |
|
1052 Token *tp; |
|
1053 FirstCharKind c1kind; |
|
1054 const jschar *numStart; |
|
1055 bool hasExp; |
|
1056 DecimalPoint decimalPoint; |
|
1057 const jschar *identStart; |
|
1058 bool hadUnicodeEscape; |
|
1059 |
|
1060 retry: |
|
1061 if (MOZ_UNLIKELY(!userbuf.hasRawChars())) { |
|
1062 tp = newToken(0); |
|
1063 tp->type = TOK_EOF; |
|
1064 flags.isEOF = true; |
|
1065 goto out; |
|
1066 } |
|
1067 |
|
1068 c = userbuf.getRawChar(); |
|
1069 JS_ASSERT(c != EOF); |
|
1070 |
|
1071 // Chars not in the range 0..127 are rare. Getting them out of the way |
|
1072 // early allows subsequent checking to be faster. |
|
1073 if (MOZ_UNLIKELY(c >= 128)) { |
|
1074 if (IsSpaceOrBOM2(c)) { |
|
1075 if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) { |
|
1076 updateLineInfoForEOL(); |
|
1077 updateFlagsForEOL(); |
|
1078 } |
|
1079 |
|
1080 goto retry; |
|
1081 } |
|
1082 |
|
1083 tp = newToken(-1); |
|
1084 |
|
1085 // '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. |
|
1086 JS_STATIC_ASSERT('$' < 128 && '_' < 128); |
|
1087 if (IsLetter(c)) { |
|
1088 identStart = userbuf.addressOfNextRawChar() - 1; |
|
1089 hadUnicodeEscape = false; |
|
1090 goto identifier; |
|
1091 } |
|
1092 |
|
1093 goto badchar; |
|
1094 } |
|
1095 |
|
1096 // Get the token kind, based on the first char. The ordering of c1kind |
|
1097 // comparison is based on the frequency of tokens in real code -- Parsemark |
|
1098 // (which represents typical JS code on the web) and the Unreal demo (which |
|
1099 // represents asm.js code). |
|
1100 // |
|
1101 // Parsemark Unreal |
|
1102 // OneChar 32.9% 39.7% |
|
1103 // Space 25.0% 0.6% |
|
1104 // Ident 19.2% 36.4% |
|
1105 // Dec 7.2% 5.1% |
|
1106 // String 7.9% 0.0% |
|
1107 // EOL 1.7% 0.0% |
|
1108 // BasePrefix 0.4% 4.9% |
|
1109 // Other 5.7% 13.3% |
|
1110 // |
|
1111 // The ordering is based mostly only Parsemark frequencies, with Unreal |
|
1112 // frequencies used to break close categories (e.g. |Dec| and |String|). |
|
1113 // |Other| is biggish, but no other token kind is common enough for it to |
|
1114 // be worth adding extra values to FirstCharKind. |
|
1115 // |
|
1116 c1kind = FirstCharKind(firstCharKinds[c]); |
|
1117 |
|
1118 // Look for an unambiguous single-char token. |
|
1119 // |
|
1120 if (c1kind <= OneChar_Max) { |
|
1121 tp = newToken(-1); |
|
1122 tp->type = TokenKind(c1kind); |
|
1123 goto out; |
|
1124 } |
|
1125 |
|
1126 // Skip over non-EOL whitespace chars. |
|
1127 // |
|
1128 if (c1kind == Space) |
|
1129 goto retry; |
|
1130 |
|
1131 // Look for an identifier. |
|
1132 // |
|
1133 if (c1kind == Ident) { |
|
1134 tp = newToken(-1); |
|
1135 identStart = userbuf.addressOfNextRawChar() - 1; |
|
1136 hadUnicodeEscape = false; |
|
1137 |
|
1138 identifier: |
|
1139 for (;;) { |
|
1140 c = getCharIgnoreEOL(); |
|
1141 if (c == EOF) |
|
1142 break; |
|
1143 if (!IsIdentifierPart(c)) { |
|
1144 if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) |
|
1145 break; |
|
1146 hadUnicodeEscape = true; |
|
1147 } |
|
1148 } |
|
1149 ungetCharIgnoreEOL(c); |
|
1150 |
|
1151 // Identifiers containing no Unicode escapes can be processed directly |
|
1152 // from userbuf. The rest must use the escapes converted via tokenbuf |
|
1153 // before atomizing. |
|
1154 const jschar *chars; |
|
1155 size_t length; |
|
1156 if (hadUnicodeEscape) { |
|
1157 if (!putIdentInTokenbuf(identStart)) |
|
1158 goto error; |
|
1159 |
|
1160 chars = tokenbuf.begin(); |
|
1161 length = tokenbuf.length(); |
|
1162 } else { |
|
1163 chars = identStart; |
|
1164 length = userbuf.addressOfNextRawChar() - identStart; |
|
1165 } |
|
1166 |
|
1167 // Check for keywords unless the parser told us not to. |
|
1168 if (modifier != KeywordIsName) { |
|
1169 tp->type = TOK_NAME; |
|
1170 if (!checkForKeyword(chars, length, &tp->type)) |
|
1171 goto error; |
|
1172 if (tp->type != TOK_NAME) |
|
1173 goto out; |
|
1174 } |
|
1175 |
|
1176 JSAtom *atom = AtomizeChars(cx, chars, length); |
|
1177 if (!atom) |
|
1178 goto error; |
|
1179 tp->type = TOK_NAME; |
|
1180 tp->setName(atom->asPropertyName()); |
|
1181 goto out; |
|
1182 } |
|
1183 |
|
1184 // Look for a decimal number. |
|
1185 // |
|
1186 if (c1kind == Dec) { |
|
1187 tp = newToken(-1); |
|
1188 numStart = userbuf.addressOfNextRawChar() - 1; |
|
1189 |
|
1190 decimal: |
|
1191 decimalPoint = NoDecimal; |
|
1192 hasExp = false; |
|
1193 while (JS7_ISDEC(c)) |
|
1194 c = getCharIgnoreEOL(); |
|
1195 |
|
1196 if (c == '.') { |
|
1197 decimalPoint = HasDecimal; |
|
1198 decimal_dot: |
|
1199 do { |
|
1200 c = getCharIgnoreEOL(); |
|
1201 } while (JS7_ISDEC(c)); |
|
1202 } |
|
1203 if (c == 'e' || c == 'E') { |
|
1204 hasExp = true; |
|
1205 c = getCharIgnoreEOL(); |
|
1206 if (c == '+' || c == '-') |
|
1207 c = getCharIgnoreEOL(); |
|
1208 if (!JS7_ISDEC(c)) { |
|
1209 ungetCharIgnoreEOL(c); |
|
1210 reportError(JSMSG_MISSING_EXPONENT); |
|
1211 goto error; |
|
1212 } |
|
1213 do { |
|
1214 c = getCharIgnoreEOL(); |
|
1215 } while (JS7_ISDEC(c)); |
|
1216 } |
|
1217 ungetCharIgnoreEOL(c); |
|
1218 |
|
1219 if (c != EOF && IsIdentifierStart(c)) { |
|
1220 reportError(JSMSG_IDSTART_AFTER_NUMBER); |
|
1221 goto error; |
|
1222 } |
|
1223 |
|
1224 // Unlike identifiers and strings, numbers cannot contain escaped |
|
1225 // chars, so we don't need to use tokenbuf. Instead we can just |
|
1226 // convert the jschars in userbuf directly to the numeric value. |
|
1227 double dval; |
|
1228 if (!((decimalPoint == HasDecimal) || hasExp)) { |
|
1229 if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval)) |
|
1230 goto error; |
|
1231 } else { |
|
1232 const jschar *dummy; |
|
1233 if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval)) |
|
1234 goto error; |
|
1235 } |
|
1236 tp->type = TOK_NUMBER; |
|
1237 tp->setNumber(dval, decimalPoint); |
|
1238 goto out; |
|
1239 } |
|
1240 |
|
1241 // Look for a string. |
|
1242 // |
|
1243 if (c1kind == String) { |
|
1244 tp = newToken(-1); |
|
1245 qc = c; |
|
1246 tokenbuf.clear(); |
|
1247 while (true) { |
|
1248 // We need to detect any of these chars: " or ', \n (or its |
|
1249 // equivalents), \\, EOF. We use maybeStrSpecial[] in a manner |
|
1250 // similar to maybeEOL[], see above. Because we detect EOL |
|
1251 // sequences here and put them back immediately, we can use |
|
1252 // getCharIgnoreEOL(). |
|
1253 c = getCharIgnoreEOL(); |
|
1254 if (maybeStrSpecial[c & 0xff]) { |
|
1255 if (c == qc) |
|
1256 break; |
|
1257 if (c == '\\') { |
|
1258 switch (c = getChar()) { |
|
1259 case 'b': c = '\b'; break; |
|
1260 case 'f': c = '\f'; break; |
|
1261 case 'n': c = '\n'; break; |
|
1262 case 'r': c = '\r'; break; |
|
1263 case 't': c = '\t'; break; |
|
1264 case 'v': c = '\v'; break; |
|
1265 |
|
1266 default: |
|
1267 if ('0' <= c && c < '8') { |
|
1268 int32_t val = JS7_UNDEC(c); |
|
1269 |
|
1270 c = peekChar(); |
|
1271 // Strict mode code allows only \0, then a non-digit. |
|
1272 if (val != 0 || JS7_ISDEC(c)) { |
|
1273 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL)) |
|
1274 goto error; |
|
1275 flags.sawOctalEscape = true; |
|
1276 } |
|
1277 if ('0' <= c && c < '8') { |
|
1278 val = 8 * val + JS7_UNDEC(c); |
|
1279 getChar(); |
|
1280 c = peekChar(); |
|
1281 if ('0' <= c && c < '8') { |
|
1282 int32_t save = val; |
|
1283 val = 8 * val + JS7_UNDEC(c); |
|
1284 if (val <= 0377) |
|
1285 getChar(); |
|
1286 else |
|
1287 val = save; |
|
1288 } |
|
1289 } |
|
1290 |
|
1291 c = jschar(val); |
|
1292 } else if (c == 'u') { |
|
1293 jschar cp[4]; |
|
1294 if (peekChars(4, cp) && |
|
1295 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && |
|
1296 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) { |
|
1297 c = (((((JS7_UNHEX(cp[0]) << 4) |
|
1298 + JS7_UNHEX(cp[1])) << 4) |
|
1299 + JS7_UNHEX(cp[2])) << 4) |
|
1300 + JS7_UNHEX(cp[3]); |
|
1301 skipChars(4); |
|
1302 } else { |
|
1303 reportError(JSMSG_MALFORMED_ESCAPE, "Unicode"); |
|
1304 goto error; |
|
1305 } |
|
1306 } else if (c == 'x') { |
|
1307 jschar cp[2]; |
|
1308 if (peekChars(2, cp) && |
|
1309 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) { |
|
1310 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]); |
|
1311 skipChars(2); |
|
1312 } else { |
|
1313 reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal"); |
|
1314 goto error; |
|
1315 } |
|
1316 } else if (c == '\n') { |
|
1317 // ES5 7.8.4: an escaped line terminator represents |
|
1318 // no character. |
|
1319 continue; |
|
1320 } |
|
1321 break; |
|
1322 } |
|
1323 } else if (TokenBuf::isRawEOLChar(c) || c == EOF) { |
|
1324 ungetCharIgnoreEOL(c); |
|
1325 reportError(JSMSG_UNTERMINATED_STRING); |
|
1326 goto error; |
|
1327 } |
|
1328 } |
|
1329 if (!tokenbuf.append(c)) |
|
1330 goto error; |
|
1331 } |
|
1332 JSAtom *atom = atomize(cx, tokenbuf); |
|
1333 if (!atom) |
|
1334 goto error; |
|
1335 tp->type = TOK_STRING; |
|
1336 tp->setAtom(atom); |
|
1337 goto out; |
|
1338 } |
|
1339 |
|
1340 // Skip over EOL chars, updating line state along the way. |
|
1341 // |
|
1342 if (c1kind == EOL) { |
|
1343 // If it's a \r\n sequence: treat as a single EOL, skip over the \n. |
|
1344 if (c == '\r' && userbuf.hasRawChars()) |
|
1345 userbuf.matchRawChar('\n'); |
|
1346 updateLineInfoForEOL(); |
|
1347 updateFlagsForEOL(); |
|
1348 goto retry; |
|
1349 } |
|
1350 |
|
1351 // Look for a hexadecimal, octal, or binary number. |
|
1352 // |
|
1353 if (c1kind == BasePrefix) { |
|
1354 tp = newToken(-1); |
|
1355 int radix; |
|
1356 c = getCharIgnoreEOL(); |
|
1357 if (c == 'x' || c == 'X') { |
|
1358 radix = 16; |
|
1359 c = getCharIgnoreEOL(); |
|
1360 if (!JS7_ISHEX(c)) { |
|
1361 ungetCharIgnoreEOL(c); |
|
1362 reportError(JSMSG_MISSING_HEXDIGITS); |
|
1363 goto error; |
|
1364 } |
|
1365 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0x' |
|
1366 while (JS7_ISHEX(c)) |
|
1367 c = getCharIgnoreEOL(); |
|
1368 } else if (c == 'b' || c == 'B') { |
|
1369 radix = 2; |
|
1370 c = getCharIgnoreEOL(); |
|
1371 if (c != '0' && c != '1') { |
|
1372 ungetCharIgnoreEOL(c); |
|
1373 reportError(JSMSG_MISSING_BINARY_DIGITS); |
|
1374 goto error; |
|
1375 } |
|
1376 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0b' |
|
1377 while (c == '0' || c == '1') |
|
1378 c = getCharIgnoreEOL(); |
|
1379 } else if (c == 'o' || c == 'O') { |
|
1380 radix = 8; |
|
1381 c = getCharIgnoreEOL(); |
|
1382 if (c < '0' || c > '7') { |
|
1383 ungetCharIgnoreEOL(c); |
|
1384 reportError(JSMSG_MISSING_OCTAL_DIGITS); |
|
1385 goto error; |
|
1386 } |
|
1387 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0o' |
|
1388 while ('0' <= c && c <= '7') |
|
1389 c = getCharIgnoreEOL(); |
|
1390 } else if (JS7_ISDEC(c)) { |
|
1391 radix = 8; |
|
1392 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0' |
|
1393 while (JS7_ISDEC(c)) { |
|
1394 // Octal integer literals are not permitted in strict mode code. |
|
1395 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL)) |
|
1396 goto error; |
|
1397 |
|
1398 // Outside strict mode, we permit 08 and 09 as decimal numbers, |
|
1399 // which makes our behaviour a superset of the ECMA numeric |
|
1400 // grammar. We might not always be so permissive, so we warn |
|
1401 // about it. |
|
1402 if (c >= '8') { |
|
1403 if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) { |
|
1404 goto error; |
|
1405 } |
|
1406 goto decimal; // use the decimal scanner for the rest of the number |
|
1407 } |
|
1408 c = getCharIgnoreEOL(); |
|
1409 } |
|
1410 } else { |
|
1411 // '0' not followed by 'x', 'X' or a digit; scan as a decimal number. |
|
1412 numStart = userbuf.addressOfNextRawChar() - 1; |
|
1413 goto decimal; |
|
1414 } |
|
1415 ungetCharIgnoreEOL(c); |
|
1416 |
|
1417 if (c != EOF && IsIdentifierStart(c)) { |
|
1418 reportError(JSMSG_IDSTART_AFTER_NUMBER); |
|
1419 goto error; |
|
1420 } |
|
1421 |
|
1422 double dval; |
|
1423 const jschar *dummy; |
|
1424 if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval)) |
|
1425 goto error; |
|
1426 tp->type = TOK_NUMBER; |
|
1427 tp->setNumber(dval, NoDecimal); |
|
1428 goto out; |
|
1429 } |
|
1430 |
|
1431 // This handles everything else. |
|
1432 // |
|
1433 JS_ASSERT(c1kind == Other); |
|
1434 tp = newToken(-1); |
|
1435 switch (c) { |
|
1436 case '.': |
|
1437 c = getCharIgnoreEOL(); |
|
1438 if (JS7_ISDEC(c)) { |
|
1439 numStart = userbuf.addressOfNextRawChar() - 2; |
|
1440 decimalPoint = HasDecimal; |
|
1441 hasExp = false; |
|
1442 goto decimal_dot; |
|
1443 } |
|
1444 if (c == '.') { |
|
1445 if (matchChar('.')) { |
|
1446 tp->type = TOK_TRIPLEDOT; |
|
1447 goto out; |
|
1448 } |
|
1449 } |
|
1450 ungetCharIgnoreEOL(c); |
|
1451 tp->type = TOK_DOT; |
|
1452 goto out; |
|
1453 |
|
1454 case '=': |
|
1455 if (matchChar('=')) |
|
1456 tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ; |
|
1457 else if (matchChar('>')) |
|
1458 tp->type = TOK_ARROW; |
|
1459 else |
|
1460 tp->type = TOK_ASSIGN; |
|
1461 goto out; |
|
1462 |
|
1463 case '+': |
|
1464 if (matchChar('+')) |
|
1465 tp->type = TOK_INC; |
|
1466 else |
|
1467 tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD; |
|
1468 goto out; |
|
1469 |
|
1470 case '\\': |
|
1471 hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc); |
|
1472 if (hadUnicodeEscape) { |
|
1473 identStart = userbuf.addressOfNextRawChar() - 6; |
|
1474 goto identifier; |
|
1475 } |
|
1476 goto badchar; |
|
1477 |
|
1478 case '|': |
|
1479 if (matchChar('|')) |
|
1480 tp->type = TOK_OR; |
|
1481 else |
|
1482 tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR; |
|
1483 goto out; |
|
1484 |
|
1485 case '^': |
|
1486 tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR; |
|
1487 goto out; |
|
1488 |
|
1489 case '&': |
|
1490 if (matchChar('&')) |
|
1491 tp->type = TOK_AND; |
|
1492 else |
|
1493 tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND; |
|
1494 goto out; |
|
1495 |
|
1496 case '!': |
|
1497 if (matchChar('=')) |
|
1498 tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE; |
|
1499 else |
|
1500 tp->type = TOK_NOT; |
|
1501 goto out; |
|
1502 |
|
1503 case '<': |
|
1504 // NB: treat HTML begin-comment as comment-till-end-of-line. |
|
1505 if (matchChar('!')) { |
|
1506 if (matchChar('-')) { |
|
1507 if (matchChar('-')) |
|
1508 goto skipline; |
|
1509 ungetChar('-'); |
|
1510 } |
|
1511 ungetChar('!'); |
|
1512 } |
|
1513 if (matchChar('<')) { |
|
1514 tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH; |
|
1515 } else { |
|
1516 tp->type = matchChar('=') ? TOK_LE : TOK_LT; |
|
1517 } |
|
1518 goto out; |
|
1519 |
|
1520 case '>': |
|
1521 if (matchChar('>')) { |
|
1522 if (matchChar('>')) |
|
1523 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH; |
|
1524 else |
|
1525 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH; |
|
1526 } else { |
|
1527 tp->type = matchChar('=') ? TOK_GE : TOK_GT; |
|
1528 } |
|
1529 goto out; |
|
1530 |
|
1531 case '*': |
|
1532 tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL; |
|
1533 goto out; |
|
1534 |
|
1535 case '/': |
|
1536 // Look for a single-line comment. |
|
1537 if (matchChar('/')) { |
|
1538 c = peekChar(); |
|
1539 if (c == '@' || c == '#') { |
|
1540 bool shouldWarn = getChar() == '@'; |
|
1541 if (!getDirectives(false, shouldWarn)) |
|
1542 goto error; |
|
1543 } |
|
1544 |
|
1545 skipline: |
|
1546 while ((c = getChar()) != EOF && c != '\n') |
|
1547 continue; |
|
1548 ungetChar(c); |
|
1549 cursor = (cursor - 1) & ntokensMask; |
|
1550 goto retry; |
|
1551 } |
|
1552 |
|
1553 // Look for a multi-line comment. |
|
1554 if (matchChar('*')) { |
|
1555 unsigned linenoBefore = lineno; |
|
1556 while ((c = getChar()) != EOF && |
|
1557 !(c == '*' && matchChar('/'))) { |
|
1558 if (c == '@' || c == '#') { |
|
1559 bool shouldWarn = c == '@'; |
|
1560 if (!getDirectives(true, shouldWarn)) |
|
1561 goto error; |
|
1562 } |
|
1563 } |
|
1564 if (c == EOF) { |
|
1565 reportError(JSMSG_UNTERMINATED_COMMENT); |
|
1566 goto error; |
|
1567 } |
|
1568 if (linenoBefore != lineno) |
|
1569 updateFlagsForEOL(); |
|
1570 cursor = (cursor - 1) & ntokensMask; |
|
1571 goto retry; |
|
1572 } |
|
1573 |
|
1574 // Look for a regexp. |
|
1575 if (modifier == Operand) { |
|
1576 tokenbuf.clear(); |
|
1577 |
|
1578 bool inCharClass = false; |
|
1579 for (;;) { |
|
1580 c = getChar(); |
|
1581 if (c == '\\') { |
|
1582 if (!tokenbuf.append(c)) |
|
1583 goto error; |
|
1584 c = getChar(); |
|
1585 } else if (c == '[') { |
|
1586 inCharClass = true; |
|
1587 } else if (c == ']') { |
|
1588 inCharClass = false; |
|
1589 } else if (c == '/' && !inCharClass) { |
|
1590 // For compat with IE, allow unescaped / in char classes. |
|
1591 break; |
|
1592 } |
|
1593 if (c == '\n' || c == EOF) { |
|
1594 ungetChar(c); |
|
1595 reportError(JSMSG_UNTERMINATED_REGEXP); |
|
1596 goto error; |
|
1597 } |
|
1598 if (!tokenbuf.append(c)) |
|
1599 goto error; |
|
1600 } |
|
1601 |
|
1602 RegExpFlag reflags = NoFlags; |
|
1603 unsigned length = tokenbuf.length() + 1; |
|
1604 while (true) { |
|
1605 c = peekChar(); |
|
1606 if (c == 'g' && !(reflags & GlobalFlag)) |
|
1607 reflags = RegExpFlag(reflags | GlobalFlag); |
|
1608 else if (c == 'i' && !(reflags & IgnoreCaseFlag)) |
|
1609 reflags = RegExpFlag(reflags | IgnoreCaseFlag); |
|
1610 else if (c == 'm' && !(reflags & MultilineFlag)) |
|
1611 reflags = RegExpFlag(reflags | MultilineFlag); |
|
1612 else if (c == 'y' && !(reflags & StickyFlag)) |
|
1613 reflags = RegExpFlag(reflags | StickyFlag); |
|
1614 else |
|
1615 break; |
|
1616 getChar(); |
|
1617 length++; |
|
1618 } |
|
1619 |
|
1620 c = peekChar(); |
|
1621 if (JS7_ISLET(c)) { |
|
1622 char buf[2] = { '\0', '\0' }; |
|
1623 tp->pos.begin += length + 1; |
|
1624 buf[0] = char(c); |
|
1625 reportError(JSMSG_BAD_REGEXP_FLAG, buf); |
|
1626 (void) getChar(); |
|
1627 goto error; |
|
1628 } |
|
1629 tp->type = TOK_REGEXP; |
|
1630 tp->setRegExpFlags(reflags); |
|
1631 goto out; |
|
1632 } |
|
1633 |
|
1634 tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV; |
|
1635 goto out; |
|
1636 |
|
1637 case '%': |
|
1638 tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD; |
|
1639 goto out; |
|
1640 |
|
1641 case '-': |
|
1642 if (matchChar('-')) { |
|
1643 if (peekChar() == '>' && !flags.isDirtyLine) |
|
1644 goto skipline; |
|
1645 tp->type = TOK_DEC; |
|
1646 } else { |
|
1647 tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB; |
|
1648 } |
|
1649 goto out; |
|
1650 |
|
1651 badchar: |
|
1652 default: |
|
1653 reportError(JSMSG_ILLEGAL_CHARACTER); |
|
1654 goto error; |
|
1655 } |
|
1656 |
|
1657 MOZ_ASSUME_UNREACHABLE("should have jumped to |out| or |error|"); |
|
1658 |
|
1659 out: |
|
1660 flags.isDirtyLine = true; |
|
1661 tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base(); |
|
1662 JS_ASSERT(IsTokenSane(tp)); |
|
1663 return tp->type; |
|
1664 |
|
1665 error: |
|
1666 flags.isDirtyLine = true; |
|
1667 tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base(); |
|
1668 tp->type = TOK_ERROR; |
|
1669 JS_ASSERT(IsTokenSane(tp)); |
|
1670 onError(); |
|
1671 return TOK_ERROR; |
|
1672 } |
|
1673 |
|
1674 void |
|
1675 TokenStream::onError() |
|
1676 { |
|
1677 flags.hadError = true; |
|
1678 #ifdef DEBUG |
|
1679 // Poisoning userbuf on error establishes an invariant: once an erroneous |
|
1680 // token has been seen, userbuf will not be consulted again. This is true |
|
1681 // because the parser will either (a) deal with the TOK_ERROR token by |
|
1682 // aborting parsing immediately; or (b) if the TOK_ERROR token doesn't |
|
1683 // match what it expected, it will unget the token, and the next getToken() |
|
1684 // call will immediately return the just-gotten TOK_ERROR token again |
|
1685 // without consulting userbuf, thanks to the lookahead buffer. |
|
1686 userbuf.poison(); |
|
1687 #endif |
|
1688 } |
|
1689 |
|
1690 JS_FRIEND_API(int) |
|
1691 js_fgets(char *buf, int size, FILE *file) |
|
1692 { |
|
1693 int n, i, c; |
|
1694 bool crflag; |
|
1695 |
|
1696 n = size - 1; |
|
1697 if (n < 0) |
|
1698 return -1; |
|
1699 |
|
1700 crflag = false; |
|
1701 for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) { |
|
1702 buf[i] = c; |
|
1703 if (c == '\n') { // any \n ends a line |
|
1704 i++; // keep the \n; we know there is room for \0 |
|
1705 break; |
|
1706 } |
|
1707 if (crflag) { // \r not followed by \n ends line at the \r |
|
1708 ungetc(c, file); |
|
1709 break; // and overwrite c in buf with \0 |
|
1710 } |
|
1711 crflag = (c == '\r'); |
|
1712 } |
|
1713 |
|
1714 buf[i] = '\0'; |
|
1715 return i; |
|
1716 } |
|
1717 |
|
1718 #ifdef DEBUG |
|
1719 const char * |
|
1720 TokenKindToString(TokenKind tt) |
|
1721 { |
|
1722 switch (tt) { |
|
1723 case TOK_ERROR: return "TOK_ERROR"; |
|
1724 case TOK_EOF: return "TOK_EOF"; |
|
1725 case TOK_EOL: return "TOK_EOL"; |
|
1726 case TOK_SEMI: return "TOK_SEMI"; |
|
1727 case TOK_COMMA: return "TOK_COMMA"; |
|
1728 case TOK_HOOK: return "TOK_HOOK"; |
|
1729 case TOK_COLON: return "TOK_COLON"; |
|
1730 case TOK_OR: return "TOK_OR"; |
|
1731 case TOK_AND: return "TOK_AND"; |
|
1732 case TOK_BITOR: return "TOK_BITOR"; |
|
1733 case TOK_BITXOR: return "TOK_BITXOR"; |
|
1734 case TOK_BITAND: return "TOK_BITAND"; |
|
1735 case TOK_ADD: return "TOK_ADD"; |
|
1736 case TOK_SUB: return "TOK_SUB"; |
|
1737 case TOK_MUL: return "TOK_MUL"; |
|
1738 case TOK_DIV: return "TOK_DIV"; |
|
1739 case TOK_MOD: return "TOK_MOD"; |
|
1740 case TOK_INC: return "TOK_INC"; |
|
1741 case TOK_DEC: return "TOK_DEC"; |
|
1742 case TOK_DOT: return "TOK_DOT"; |
|
1743 case TOK_TRIPLEDOT: return "TOK_TRIPLEDOT"; |
|
1744 case TOK_LB: return "TOK_LB"; |
|
1745 case TOK_RB: return "TOK_RB"; |
|
1746 case TOK_LC: return "TOK_LC"; |
|
1747 case TOK_RC: return "TOK_RC"; |
|
1748 case TOK_LP: return "TOK_LP"; |
|
1749 case TOK_RP: return "TOK_RP"; |
|
1750 case TOK_ARROW: return "TOK_ARROW"; |
|
1751 case TOK_NAME: return "TOK_NAME"; |
|
1752 case TOK_NUMBER: return "TOK_NUMBER"; |
|
1753 case TOK_STRING: return "TOK_STRING"; |
|
1754 case TOK_REGEXP: return "TOK_REGEXP"; |
|
1755 case TOK_TRUE: return "TOK_TRUE"; |
|
1756 case TOK_FALSE: return "TOK_FALSE"; |
|
1757 case TOK_NULL: return "TOK_NULL"; |
|
1758 case TOK_THIS: return "TOK_THIS"; |
|
1759 case TOK_FUNCTION: return "TOK_FUNCTION"; |
|
1760 case TOK_IF: return "TOK_IF"; |
|
1761 case TOK_ELSE: return "TOK_ELSE"; |
|
1762 case TOK_SWITCH: return "TOK_SWITCH"; |
|
1763 case TOK_CASE: return "TOK_CASE"; |
|
1764 case TOK_DEFAULT: return "TOK_DEFAULT"; |
|
1765 case TOK_WHILE: return "TOK_WHILE"; |
|
1766 case TOK_DO: return "TOK_DO"; |
|
1767 case TOK_FOR: return "TOK_FOR"; |
|
1768 case TOK_BREAK: return "TOK_BREAK"; |
|
1769 case TOK_CONTINUE: return "TOK_CONTINUE"; |
|
1770 case TOK_IN: return "TOK_IN"; |
|
1771 case TOK_VAR: return "TOK_VAR"; |
|
1772 case TOK_CONST: return "TOK_CONST"; |
|
1773 case TOK_WITH: return "TOK_WITH"; |
|
1774 case TOK_RETURN: return "TOK_RETURN"; |
|
1775 case TOK_NEW: return "TOK_NEW"; |
|
1776 case TOK_DELETE: return "TOK_DELETE"; |
|
1777 case TOK_TRY: return "TOK_TRY"; |
|
1778 case TOK_CATCH: return "TOK_CATCH"; |
|
1779 case TOK_FINALLY: return "TOK_FINALLY"; |
|
1780 case TOK_THROW: return "TOK_THROW"; |
|
1781 case TOK_INSTANCEOF: return "TOK_INSTANCEOF"; |
|
1782 case TOK_DEBUGGER: return "TOK_DEBUGGER"; |
|
1783 case TOK_YIELD: return "TOK_YIELD"; |
|
1784 case TOK_LET: return "TOK_LET"; |
|
1785 case TOK_RESERVED: return "TOK_RESERVED"; |
|
1786 case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED"; |
|
1787 case TOK_STRICTEQ: return "TOK_STRICTEQ"; |
|
1788 case TOK_EQ: return "TOK_EQ"; |
|
1789 case TOK_STRICTNE: return "TOK_STRICTNE"; |
|
1790 case TOK_NE: return "TOK_NE"; |
|
1791 case TOK_TYPEOF: return "TOK_TYPEOF"; |
|
1792 case TOK_VOID: return "TOK_VOID"; |
|
1793 case TOK_NOT: return "TOK_NOT"; |
|
1794 case TOK_BITNOT: return "TOK_BITNOT"; |
|
1795 case TOK_LT: return "TOK_LT"; |
|
1796 case TOK_LE: return "TOK_LE"; |
|
1797 case TOK_GT: return "TOK_GT"; |
|
1798 case TOK_GE: return "TOK_GE"; |
|
1799 case TOK_LSH: return "TOK_LSH"; |
|
1800 case TOK_RSH: return "TOK_RSH"; |
|
1801 case TOK_URSH: return "TOK_URSH"; |
|
1802 case TOK_ASSIGN: return "TOK_ASSIGN"; |
|
1803 case TOK_ADDASSIGN: return "TOK_ADDASSIGN"; |
|
1804 case TOK_SUBASSIGN: return "TOK_SUBASSIGN"; |
|
1805 case TOK_BITORASSIGN: return "TOK_BITORASSIGN"; |
|
1806 case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN"; |
|
1807 case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN"; |
|
1808 case TOK_LSHASSIGN: return "TOK_LSHASSIGN"; |
|
1809 case TOK_RSHASSIGN: return "TOK_RSHASSIGN"; |
|
1810 case TOK_URSHASSIGN: return "TOK_URSHASSIGN"; |
|
1811 case TOK_MULASSIGN: return "TOK_MULASSIGN"; |
|
1812 case TOK_DIVASSIGN: return "TOK_DIVASSIGN"; |
|
1813 case TOK_MODASSIGN: return "TOK_MODASSIGN"; |
|
1814 case TOK_EXPORT: return "TOK_EXPORT"; |
|
1815 case TOK_IMPORT: return "TOK_IMPORT"; |
|
1816 case TOK_LIMIT: break; |
|
1817 } |
|
1818 |
|
1819 return "<bad TokenKind>"; |
|
1820 } |
|
1821 #endif |