Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 // JS lexical scanner.
9 #include "frontend/TokenStream.h"
11 #include "mozilla/PodOperations.h"
13 #include <ctype.h>
14 #include <stdarg.h>
15 #include <stdio.h>
16 #include <string.h>
18 #include "jsatom.h"
19 #include "jscntxt.h"
20 #include "jsexn.h"
21 #include "jsnum.h"
22 #include "jsworkers.h"
24 #include "frontend/BytecodeCompiler.h"
25 #include "js/CharacterEncoding.h"
26 #include "vm/Keywords.h"
27 #include "vm/StringBuffer.h"
29 using namespace js;
30 using namespace js::frontend;
31 using namespace js::unicode;
33 using mozilla::Maybe;
34 using mozilla::PodAssign;
35 using mozilla::PodCopy;
36 using mozilla::PodZero;
38 struct KeywordInfo {
39 const char *chars; // C string with keyword text
40 TokenKind tokentype;
41 JSVersion version;
42 };
44 static const KeywordInfo keywords[] = {
45 #define KEYWORD_INFO(keyword, name, type, version) \
46 {js_##keyword##_str, type, version},
47 FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
48 #undef KEYWORD_INFO
49 };
51 // Returns a KeywordInfo for the specified characters, or nullptr if the string
52 // is not a keyword.
53 static const KeywordInfo *
54 FindKeyword(const jschar *s, size_t length)
55 {
56 JS_ASSERT(length != 0);
58 size_t i;
59 const KeywordInfo *kw;
60 const char *chars;
62 #define JSKW_LENGTH() length
63 #define JSKW_AT(column) s[column]
64 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
65 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
66 #define JSKW_NO_MATCH() goto no_match;
67 #include "jsautokw.h"
68 #undef JSKW_NO_MATCH
69 #undef JSKW_TEST_GUESS
70 #undef JSKW_GOT_MATCH
71 #undef JSKW_AT
72 #undef JSKW_LENGTH
74 got_match:
75 return &keywords[i];
77 test_guess:
78 kw = &keywords[i];
79 chars = kw->chars;
80 do {
81 if (*s++ != (unsigned char)(*chars++))
82 goto no_match;
83 } while (--length != 0);
84 return kw;
86 no_match:
87 return nullptr;
88 }
90 bool
91 frontend::IsIdentifier(JSLinearString *str)
92 {
93 const jschar *chars = str->chars();
94 size_t length = str->length();
96 if (length == 0)
97 return false;
98 jschar c = *chars;
99 if (!IsIdentifierStart(c))
100 return false;
101 const jschar *end = chars + length;
102 while (++chars != end) {
103 c = *chars;
104 if (!IsIdentifierPart(c))
105 return false;
106 }
107 return true;
108 }
110 bool
111 frontend::IsKeyword(JSLinearString *str)
112 {
113 return FindKeyword(str->chars(), str->length()) != nullptr;
114 }
116 TokenStream::SourceCoords::SourceCoords(ExclusiveContext *cx, uint32_t ln)
117 : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
118 {
119 // This is actually necessary! Removing it causes compile errors on
120 // GCC and clang. You could try declaring this:
121 //
122 // const uint32_t TokenStream::SourceCoords::MAX_PTR;
123 //
124 // which fixes the GCC/clang error, but causes bustage on Windows. Sigh.
125 //
126 uint32_t maxPtr = MAX_PTR;
128 // The first line begins at buffer offset 0. MAX_PTR is the sentinel. The
129 // appends cannot fail because |lineStartOffsets_| has statically-allocated
130 // elements.
131 JS_ASSERT(lineStartOffsets_.capacity() >= 2);
132 (void)lineStartOffsets_.reserve(2);
133 lineStartOffsets_.infallibleAppend(0);
134 lineStartOffsets_.infallibleAppend(maxPtr);
135 }
137 MOZ_ALWAYS_INLINE void
138 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
139 {
140 uint32_t lineIndex = lineNumToIndex(lineNum);
141 uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
143 JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
145 if (lineIndex == sentinelIndex) {
146 // We haven't seen this newline before. Update lineStartOffsets_.
147 // We ignore any failures due to OOM -- because we always have a
148 // sentinel node, it'll just be like the newline wasn't present. I.e.
149 // the line numbers will be wrong, but the code won't crash or anything
150 // like that.
151 lineStartOffsets_[lineIndex] = lineStartOffset;
153 uint32_t maxPtr = MAX_PTR;
154 (void)lineStartOffsets_.append(maxPtr);
156 } else {
157 // We have seen this newline before (and ungot it). Do nothing (other
158 // than checking it hasn't mysteriously changed).
159 JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset);
160 }
161 }
163 MOZ_ALWAYS_INLINE bool
164 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other)
165 {
166 JS_ASSERT(lineStartOffsets_.back() == MAX_PTR);
167 JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
169 if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
170 return true;
172 uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
173 lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
175 for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
176 if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
177 return false;
178 }
179 return true;
180 }
182 MOZ_ALWAYS_INLINE uint32_t
183 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
184 {
185 uint32_t iMin, iMax, iMid;
187 if (lineStartOffsets_[lastLineIndex_] <= offset) {
188 // If we reach here, offset is on a line the same as or higher than
189 // last time. Check first for the +0, +1, +2 cases, because they
190 // typically cover 85--98% of cases.
191 if (offset < lineStartOffsets_[lastLineIndex_ + 1])
192 return lastLineIndex_; // lineIndex is same as last time
194 // If we reach here, there must be at least one more entry (plus the
195 // sentinel). Try it.
196 lastLineIndex_++;
197 if (offset < lineStartOffsets_[lastLineIndex_ + 1])
198 return lastLineIndex_; // lineIndex is one higher than last time
200 // The same logic applies here.
201 lastLineIndex_++;
202 if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
203 return lastLineIndex_; // lineIndex is two higher than last time
204 }
206 // No luck. Oh well, we have a better-than-default starting point for
207 // the binary search.
208 iMin = lastLineIndex_ + 1;
209 JS_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel
211 } else {
212 iMin = 0;
213 }
215 // This is a binary search with deferred detection of equality, which was
216 // marginally faster in this case than a standard binary search.
217 // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
218 // want one before that.
219 iMax = lineStartOffsets_.length() - 2;
220 while (iMax > iMin) {
221 iMid = iMin + (iMax - iMin) / 2;
222 if (offset >= lineStartOffsets_[iMid + 1])
223 iMin = iMid + 1; // offset is above lineStartOffsets_[iMid]
224 else
225 iMax = iMid; // offset is below or within lineStartOffsets_[iMid]
226 }
227 JS_ASSERT(iMax == iMin);
228 JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
229 lastLineIndex_ = iMin;
230 return iMin;
231 }
233 uint32_t
234 TokenStream::SourceCoords::lineNum(uint32_t offset) const
235 {
236 uint32_t lineIndex = lineIndexOf(offset);
237 return lineIndexToNum(lineIndex);
238 }
240 uint32_t
241 TokenStream::SourceCoords::columnIndex(uint32_t offset) const
242 {
243 uint32_t lineIndex = lineIndexOf(offset);
244 uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
245 JS_ASSERT(offset >= lineStartOffset);
246 return offset - lineStartOffset;
247 }
249 void
250 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum,
251 uint32_t *columnIndex) const
252 {
253 uint32_t lineIndex = lineIndexOf(offset);
254 *lineNum = lineIndexToNum(lineIndex);
255 uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
256 JS_ASSERT(offset >= lineStartOffset);
257 *columnIndex = offset - lineStartOffset;
258 }
260 #ifdef _MSC_VER
261 #pragma warning(push)
262 #pragma warning(disable:4351)
263 #endif
265 // Initialize members that aren't initialized in |init|.
266 TokenStream::TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
267 const jschar *base, size_t length, StrictModeGetter *smg)
268 : srcCoords(cx, options.lineno),
269 options_(options),
270 tokens(),
271 cursor(),
272 lookahead(),
273 lineno(options.lineno),
274 flags(),
275 linebase(base - options.column),
276 prevLinebase(nullptr),
277 userbuf(cx, base - options.column, length + options.column), // See comment below
278 filename(options.filename()),
279 displayURL_(nullptr),
280 sourceMapURL_(nullptr),
281 tokenbuf(cx),
282 cx(cx),
283 originPrincipals(options.originPrincipals(cx)),
284 strictModeGetter(smg)
285 {
286 // The caller must ensure that a reference is held on the supplied principals
287 // throughout compilation.
288 JS_ASSERT_IF(originPrincipals, originPrincipals->refcount > 0);
290 // Column numbers are computed as offsets from the current line's base, so the
291 // initial line's base must be included in the buffer. linebase and userbuf
292 // were adjusted above, and if we are starting tokenization part way through
293 // this line then adjust the next character.
294 userbuf.setAddressOfNextRawChar(base);
296 // Nb: the following tables could be static, but initializing them here is
297 // much easier. Don't worry, the time to initialize them for each
298 // TokenStream is trivial. See bug 639420.
300 // See getChar() for an explanation of maybeEOL[].
301 memset(maybeEOL, 0, sizeof(maybeEOL));
302 maybeEOL[unsigned('\n')] = true;
303 maybeEOL[unsigned('\r')] = true;
304 maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
305 maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
307 // See getTokenInternal() for an explanation of maybeStrSpecial[].
308 memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
309 maybeStrSpecial[unsigned('"')] = true;
310 maybeStrSpecial[unsigned('\'')] = true;
311 maybeStrSpecial[unsigned('\\')] = true;
312 maybeStrSpecial[unsigned('\n')] = true;
313 maybeStrSpecial[unsigned('\r')] = true;
314 maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
315 maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
316 maybeStrSpecial[unsigned(EOF & 0xff)] = true;
318 // See Parser::assignExpr() for an explanation of isExprEnding[].
319 memset(isExprEnding, 0, sizeof(isExprEnding));
320 isExprEnding[TOK_COMMA] = 1;
321 isExprEnding[TOK_SEMI] = 1;
322 isExprEnding[TOK_COLON] = 1;
323 isExprEnding[TOK_RP] = 1;
324 isExprEnding[TOK_RB] = 1;
325 isExprEnding[TOK_RC] = 1;
326 }
328 #ifdef _MSC_VER
329 #pragma warning(pop)
330 #endif
332 TokenStream::~TokenStream()
333 {
334 js_free(displayURL_);
335 js_free(sourceMapURL_);
337 JS_ASSERT_IF(originPrincipals, originPrincipals->refcount);
338 }
340 // Use the fastest available getc.
341 #if defined(HAVE_GETC_UNLOCKED)
342 # define fast_getc getc_unlocked
343 #elif defined(HAVE__GETC_NOLOCK)
344 # define fast_getc _getc_nolock
345 #else
346 # define fast_getc getc
347 #endif
349 MOZ_ALWAYS_INLINE void
350 TokenStream::updateLineInfoForEOL()
351 {
352 prevLinebase = linebase;
353 linebase = userbuf.addressOfNextRawChar();
354 lineno++;
355 srcCoords.add(lineno, linebase - userbuf.base());
356 }
358 MOZ_ALWAYS_INLINE void
359 TokenStream::updateFlagsForEOL()
360 {
361 flags.isDirtyLine = false;
362 }
364 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.
365 int32_t
366 TokenStream::getChar()
367 {
368 int32_t c;
369 if (MOZ_LIKELY(userbuf.hasRawChars())) {
370 c = userbuf.getRawChar();
372 // Normalize the jschar if it was a newline. We need to detect any of
373 // these four characters: '\n' (0x000a), '\r' (0x000d),
374 // LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each
375 // one in turn is slow, so we use a single probabilistic check, and if
376 // that succeeds, test for them individually.
377 //
378 // We use the bottom 8 bits to index into a lookup table, succeeding
379 // when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which
380 // are by the far the most common) this gives false positives for '('
381 // (0x0028) and ')' (0x0029). We could avoid those by incorporating
382 // the 13th bit of d into the lookup, but that requires extra shifting
383 // and masking and isn't worthwhile. See TokenStream::TokenStream()
384 // for the initialization of the relevant entries in the table.
385 if (MOZ_UNLIKELY(maybeEOL[c & 0xff])) {
386 if (c == '\n')
387 goto eol;
388 if (c == '\r') {
389 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
390 if (userbuf.hasRawChars())
391 userbuf.matchRawChar('\n');
392 goto eol;
393 }
394 if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
395 goto eol;
396 }
397 return c;
398 }
400 flags.isEOF = true;
401 return EOF;
403 eol:
404 updateLineInfoForEOL();
405 return '\n';
406 }
408 // This gets the next char. It does nothing special with EOL sequences, not
409 // even updating the line counters. It can be used safely if (a) the
410 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
411 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used
412 // before it's ungotten.
413 int32_t
414 TokenStream::getCharIgnoreEOL()
415 {
416 if (MOZ_LIKELY(userbuf.hasRawChars()))
417 return userbuf.getRawChar();
419 flags.isEOF = true;
420 return EOF;
421 }
423 void
424 TokenStream::ungetChar(int32_t c)
425 {
426 if (c == EOF)
427 return;
428 JS_ASSERT(!userbuf.atStart());
429 userbuf.ungetRawChar();
430 if (c == '\n') {
431 #ifdef DEBUG
432 int32_t c2 = userbuf.peekRawChar();
433 JS_ASSERT(TokenBuf::isRawEOLChar(c2));
434 #endif
436 // If it's a \r\n sequence, also unget the \r.
437 if (!userbuf.atStart())
438 userbuf.matchRawCharBackwards('\r');
440 JS_ASSERT(prevLinebase); // we should never get more than one EOL char
441 linebase = prevLinebase;
442 prevLinebase = nullptr;
443 lineno--;
444 } else {
445 JS_ASSERT(userbuf.peekRawChar() == c);
446 }
447 }
449 void
450 TokenStream::ungetCharIgnoreEOL(int32_t c)
451 {
452 if (c == EOF)
453 return;
454 JS_ASSERT(!userbuf.atStart());
455 userbuf.ungetRawChar();
456 }
458 // Return true iff |n| raw characters can be read from this without reading past
459 // EOF or a newline, and copy those characters into |cp| if so. The characters
460 // are not consumed: use skipChars(n) to do so after checking that the consumed
461 // characters had appropriate values.
462 bool
463 TokenStream::peekChars(int n, jschar *cp)
464 {
465 int i, j;
466 int32_t c;
468 for (i = 0; i < n; i++) {
469 c = getCharIgnoreEOL();
470 if (c == EOF)
471 break;
472 if (c == '\n') {
473 ungetCharIgnoreEOL(c);
474 break;
475 }
476 cp[i] = jschar(c);
477 }
478 for (j = i - 1; j >= 0; j--)
479 ungetCharIgnoreEOL(cp[j]);
480 return i == n;
481 }
483 const jschar *
484 TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max)
485 {
486 JS_ASSERT(base_ <= p && p <= limit_);
488 size_t n = 0;
489 while (true) {
490 if (p >= limit_)
491 break;
492 if (n >= max)
493 break;
494 if (TokenBuf::isRawEOLChar(*p++))
495 break;
496 n++;
497 }
498 return p;
499 }
501 void
502 TokenStream::advance(size_t position)
503 {
504 const jschar *end = userbuf.base() + position;
505 while (userbuf.addressOfNextRawChar() < end)
506 getChar();
508 Token *cur = &tokens[cursor];
509 cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base();
510 cur->type = TOK_ERROR;
511 lookahead = 0;
512 }
514 void
515 TokenStream::tell(Position *pos)
516 {
517 pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
518 pos->flags = flags;
519 pos->lineno = lineno;
520 pos->linebase = linebase;
521 pos->prevLinebase = prevLinebase;
522 pos->lookahead = lookahead;
523 pos->currentToken = currentToken();
524 for (unsigned i = 0; i < lookahead; i++)
525 pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
526 }
528 void
529 TokenStream::seek(const Position &pos)
530 {
531 userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
532 flags = pos.flags;
533 lineno = pos.lineno;
534 linebase = pos.linebase;
535 prevLinebase = pos.prevLinebase;
536 lookahead = pos.lookahead;
538 tokens[cursor] = pos.currentToken;
539 for (unsigned i = 0; i < lookahead; i++)
540 tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
541 }
543 bool
544 TokenStream::seek(const Position &pos, const TokenStream &other)
545 {
546 if (!srcCoords.fill(other.srcCoords))
547 return false;
548 seek(pos);
549 return true;
550 }
552 bool
553 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
554 va_list args)
555 {
556 // In strict mode code, this is an error, not merely a warning.
557 unsigned flags = JSREPORT_STRICT;
558 if (strictMode)
559 flags |= JSREPORT_ERROR;
560 else if (options().extraWarningsOption)
561 flags |= JSREPORT_WARNING;
562 else
563 return true;
565 return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
566 }
568 void
569 CompileError::throwError(JSContext *cx)
570 {
571 // If there's a runtime exception type associated with this error
572 // number, set that as the pending exception. For errors occuring at
573 // compile time, this is very likely to be a JSEXN_SYNTAXERR.
574 //
575 // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
576 // flag will be set in report.flags. Proper behavior for an error
577 // reporter is to ignore a report with this flag for all but top-level
578 // compilation errors. The exception will remain pending, and so long
579 // as the non-top-level "load", "eval", or "compile" native function
580 // returns false, the top-level reporter will eventually receive the
581 // uncaught exception report.
582 if (!js_ErrorToException(cx, message, &report, nullptr, nullptr))
583 CallErrorReporter(cx, message, &report);
584 }
586 CompileError::~CompileError()
587 {
588 js_free((void*)report.uclinebuf);
589 js_free((void*)report.linebuf);
590 js_free((void*)report.ucmessage);
591 js_free(message);
592 message = nullptr;
594 if (report.messageArgs) {
595 if (argumentsType == ArgumentsAreASCII) {
596 unsigned i = 0;
597 while (report.messageArgs[i])
598 js_free((void*)report.messageArgs[i++]);
599 }
600 js_free(report.messageArgs);
601 }
603 PodZero(&report);
604 }
606 bool
607 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
608 va_list args)
609 {
610 bool warning = JSREPORT_IS_WARNING(flags);
612 if (warning && options().werrorOption) {
613 flags &= ~JSREPORT_WARNING;
614 warning = false;
615 }
617 // On the main thread, report the error immediately. When compiling off
618 // thread, save the error so that the main thread can report it later.
619 CompileError tempErr;
620 CompileError &err = cx->isJSContext() ? tempErr : cx->addPendingCompileError();
622 err.report.flags = flags;
623 err.report.errorNumber = errorNumber;
624 err.report.filename = filename;
625 err.report.originPrincipals = originPrincipals;
626 if (offset == NoOffset) {
627 err.report.lineno = 0;
628 err.report.column = 0;
629 } else {
630 err.report.lineno = srcCoords.lineNum(offset);
631 err.report.column = srcCoords.columnIndex(offset);
632 }
634 err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;
636 if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, nullptr, errorNumber, &err.message,
637 &err.report, err.argumentsType, args))
638 {
639 return false;
640 }
642 // Given a token, T, that we want to complain about: if T's (starting)
643 // lineno doesn't match TokenStream's lineno, that means we've scanned past
644 // the line that T starts on, which makes it hard to print some or all of
645 // T's (starting) line for context.
646 //
647 // So we don't even try, leaving report.linebuf and friends zeroed. This
648 // means that any error involving a multi-line token (e.g. an unterminated
649 // multi-line string literal) won't have a context printed.
650 if (offset != NoOffset && err.report.lineno == lineno) {
651 const jschar *tokenStart = userbuf.base() + offset;
653 // We show only a portion (a "window") of the line around the erroneous
654 // token -- the first char in the token, plus |windowRadius| chars
655 // before it and |windowRadius - 1| chars after it. This is because
656 // lines can be very long and printing the whole line is (a) not that
657 // helpful, and (b) can waste a lot of memory. See bug 634444.
658 static const size_t windowRadius = 60;
660 // Truncate at the front if necessary.
661 const jschar *windowBase = (linebase + windowRadius < tokenStart)
662 ? tokenStart - windowRadius
663 : linebase;
664 uint32_t windowOffset = tokenStart - windowBase;
666 // Find EOL, or truncate at the back if necessary.
667 const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius);
668 size_t windowLength = windowLimit - windowBase;
669 JS_ASSERT(windowLength <= windowRadius * 2);
671 // Create the windowed strings.
672 StringBuffer windowBuf(cx);
673 if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0))
674 return false;
676 // Unicode and char versions of the window into the offending source
677 // line, without final \n.
678 err.report.uclinebuf = windowBuf.extractWellSized();
679 if (!err.report.uclinebuf)
680 return false;
681 TwoByteChars tbchars(err.report.uclinebuf, windowLength);
682 err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str();
683 if (!err.report.linebuf)
684 return false;
686 err.report.tokenptr = err.report.linebuf + windowOffset;
687 err.report.uctokenptr = err.report.uclinebuf + windowOffset;
688 }
690 if (cx->isJSContext())
691 err.throwError(cx->asJSContext());
693 return warning;
694 }
696 bool
697 TokenStream::reportStrictModeError(unsigned errorNumber, ...)
698 {
699 va_list args;
700 va_start(args, errorNumber);
701 bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
702 errorNumber, args);
703 va_end(args);
704 return result;
705 }
707 bool
708 TokenStream::reportError(unsigned errorNumber, ...)
709 {
710 va_list args;
711 va_start(args, errorNumber);
712 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
713 args);
714 va_end(args);
715 return result;
716 }
718 bool
719 TokenStream::reportWarning(unsigned errorNumber, ...)
720 {
721 va_list args;
722 va_start(args, errorNumber);
723 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
724 errorNumber, args);
725 va_end(args);
726 return result;
727 }
729 bool
730 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
731 {
732 if (!options().extraWarningsOption)
733 return true;
735 return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
736 }
738 void
739 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
740 {
741 va_list args;
742 va_start(args, errorNumber);
743 reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args);
744 va_end(args);
745 }
747 // We have encountered a '\': check for a Unicode escape sequence after it.
748 // Return 'true' and the character code value (by value) if we found a
749 // Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
750 // advance along the buffer.
751 bool
752 TokenStream::peekUnicodeEscape(int *result)
753 {
754 jschar cp[5];
756 if (peekChars(5, cp) && cp[0] == 'u' &&
757 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
758 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
759 {
760 *result = (((((JS7_UNHEX(cp[1]) << 4)
761 + JS7_UNHEX(cp[2])) << 4)
762 + JS7_UNHEX(cp[3])) << 4)
763 + JS7_UNHEX(cp[4]);
764 return true;
765 }
766 return false;
767 }
769 bool
770 TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
771 {
772 if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
773 skipChars(5);
774 return true;
775 }
776 return false;
777 }
779 bool
780 TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
781 {
782 if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
783 skipChars(5);
784 return true;
785 }
786 return false;
787 }
789 // Helper function which returns true if the first length(q) characters in p are
790 // the same as the characters in q.
791 static bool
792 CharsMatch(const jschar *p, const char *q) {
793 while (*q) {
794 if (*p++ != *q++)
795 return false;
796 }
797 return true;
798 }
800 bool
801 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
802 {
803 // Match directive comments used in debugging, such as "//# sourceURL" and
804 // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
805 //
806 // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
807 // line comments containing a source mapping URL inside a multiline
808 // comment. To avoid potentially expensive lookahead and backtracking, we
809 // only check for this case if we encounter a '#' character.
811 if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
812 return false;
813 if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
814 return false;
816 return true;
817 }
819 bool
820 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
821 const char *directive, int directiveLength,
822 const char *errorMsgPragma, jschar **destination) {
823 JS_ASSERT(directiveLength <= 18);
824 jschar peeked[18];
825 int32_t c;
827 if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
828 if (shouldWarnDeprecated &&
829 !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
830 return false;
832 skipChars(directiveLength);
833 tokenbuf.clear();
835 while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
836 getChar();
837 // Debugging directives can occur in both single- and multi-line
838 // comments. If we're currently inside a multi-line comment, we also
839 // need to recognize multi-line comment terminators.
840 if (isMultiline && c == '*' && peekChar() == '/') {
841 ungetChar('*');
842 break;
843 }
844 tokenbuf.append(c);
845 }
847 if (tokenbuf.empty())
848 // The directive's URL was missing, but this is not quite an
849 // exception that we should stop and drop everything for.
850 return true;
852 size_t length = tokenbuf.length();
854 js_free(*destination);
855 *destination = cx->pod_malloc<jschar>(length + 1);
856 if (!*destination)
857 return false;
859 PodCopy(*destination, tokenbuf.begin(), length);
860 (*destination)[length] = '\0';
861 }
863 return true;
864 }
866 bool
867 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
868 {
869 // Match comments of the form "//# sourceURL=<url>" or
870 // "/\* //# sourceURL=<url> *\/"
871 //
872 // Note that while these are labeled "sourceURL" in the source text,
873 // internally we refer to it as a "displayURL" to distinguish what the
874 // developer would like to refer to the source as from the source's actual
875 // URL.
877 return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
878 "sourceURL", &displayURL_);
879 }
881 bool
882 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
883 {
884 // Match comments of the form "//# sourceMappingURL=<url>" or
885 // "/\* //# sourceMappingURL=<url> *\/"
887 return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
888 "sourceMappingURL", &sourceMapURL_);
889 }
891 MOZ_ALWAYS_INLINE Token *
892 TokenStream::newToken(ptrdiff_t adjust)
893 {
894 cursor = (cursor + 1) & ntokensMask;
895 Token *tp = &tokens[cursor];
896 tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base();
898 // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
899 MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
901 return tp;
902 }
904 MOZ_ALWAYS_INLINE JSAtom *
905 TokenStream::atomize(ExclusiveContext *cx, CharBuffer &cb)
906 {
907 return AtomizeChars(cx, cb.begin(), cb.length());
908 }
910 #ifdef DEBUG
911 static bool
912 IsTokenSane(Token *tp)
913 {
914 // Nb: TOK_EOL should never be used in an actual Token; it should only be
915 // returned as a TokenKind from peekTokenSameLine().
916 if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
917 return false;
919 if (tp->pos.end < tp->pos.begin)
920 return false;
922 return true;
923 }
924 #endif
926 bool
927 TokenStream::putIdentInTokenbuf(const jschar *identStart)
928 {
929 int32_t c, qc;
930 const jschar *tmp = userbuf.addressOfNextRawChar();
931 userbuf.setAddressOfNextRawChar(identStart);
933 tokenbuf.clear();
934 for (;;) {
935 c = getCharIgnoreEOL();
936 if (!IsIdentifierPart(c)) {
937 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
938 break;
939 c = qc;
940 }
941 if (!tokenbuf.append(c)) {
942 userbuf.setAddressOfNextRawChar(tmp);
943 return false;
944 }
945 }
946 userbuf.setAddressOfNextRawChar(tmp);
947 return true;
948 }
950 bool
951 TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp)
952 {
953 const KeywordInfo *kw = FindKeyword(s, length);
954 if (!kw)
955 return true;
957 if (kw->tokentype == TOK_RESERVED)
958 return reportError(JSMSG_RESERVED_ID, kw->chars);
960 if (kw->tokentype != TOK_STRICT_RESERVED) {
961 if (kw->version <= versionNumber()) {
962 // Working keyword.
963 if (ttp) {
964 *ttp = kw->tokentype;
965 return true;
966 }
967 return reportError(JSMSG_RESERVED_ID, kw->chars);
968 }
970 // The keyword is not in this version. Treat it as an identifier, unless
971 // it is let which we treat as TOK_STRICT_RESERVED by falling through to
972 // the code below (ES5 forbids it in strict mode).
973 if (kw->tokentype != TOK_LET)
974 return true;
975 }
977 // Strict reserved word.
978 return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
979 }
981 enum FirstCharKind {
982 // A jschar has the 'OneChar' kind if it, by itself, constitutes a valid
983 // token that cannot also be a prefix of a longer token. E.g. ';' has the
984 // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
985 // that begin with '+'.
986 //
987 // The few token kinds satisfying these properties cover roughly 35--45%
988 // of the tokens seen in practice.
989 //
990 // We represent the 'OneChar' kind with any positive value less than
991 // TOK_LIMIT. This representation lets us associate each one-char token
992 // jschar with a TokenKind and thus avoid a subsequent jschar-to-TokenKind
993 // conversion.
994 OneChar_Min = 0,
995 OneChar_Max = TOK_LIMIT - 1,
997 Space = TOK_LIMIT,
998 Ident,
999 Dec,
1000 String,
1001 EOL,
1002 BasePrefix,
1003 Other,
1005 LastCharKind = Other
1006 };
1008 // OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126:
1009 // '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
1010 // Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
1011 // Dot: 46: '.'
1012 // Equals: 61: '='
1013 // String: 34, 39: '"', '\''
1014 // Dec: 49..57: '1'..'9'
1015 // Plus: 43: '+'
1016 // BasePrefix: 48: '0'
1017 // Space: 9, 11, 12, 32: '\t', '\v', '\f', ' '
1018 // EOL: 10, 13: '\n', '\r'
1019 //
1020 #define T_COMMA TOK_COMMA
1021 #define T_COLON TOK_COLON
1022 #define T_BITNOT TOK_BITNOT
1023 #define _______ Other
1024 static const uint8_t firstCharKinds[] = {
1025 /* 0 1 2 3 4 5 6 7 8 9 */
1026 /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
1027 /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
1028 /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
1029 /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
1030 /* 40+ */ TOK_LP, TOK_RP, _______, _______, T_COMMA,_______, _______, _______,BasePrefix, Dec,
1031 /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON,TOK_SEMI,
1032 /* 60+ */ _______, _______, _______,TOK_HOOK, _______, Ident, Ident, Ident, Ident, Ident,
1033 /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1034 /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1035 /* 90+ */ Ident, TOK_LB, _______, TOK_RB, _______, Ident, _______, Ident, Ident, Ident,
1036 /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1037 /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
1038 /* 120+ */ Ident, Ident, Ident, TOK_LC, _______, TOK_RC,T_BITNOT, _______
1039 };
1040 #undef T_COMMA
1041 #undef T_COLON
1042 #undef T_BITNOT
1043 #undef _______
1045 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
1046 "Elements of firstCharKinds[] are too small");
1048 TokenKind
1049 TokenStream::getTokenInternal(Modifier modifier)
1050 {
1051 int c, qc;
1052 Token *tp;
1053 FirstCharKind c1kind;
1054 const jschar *numStart;
1055 bool hasExp;
1056 DecimalPoint decimalPoint;
1057 const jschar *identStart;
1058 bool hadUnicodeEscape;
1060 retry:
1061 if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
1062 tp = newToken(0);
1063 tp->type = TOK_EOF;
1064 flags.isEOF = true;
1065 goto out;
1066 }
1068 c = userbuf.getRawChar();
1069 JS_ASSERT(c != EOF);
1071 // Chars not in the range 0..127 are rare. Getting them out of the way
1072 // early allows subsequent checking to be faster.
1073 if (MOZ_UNLIKELY(c >= 128)) {
1074 if (IsSpaceOrBOM2(c)) {
1075 if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
1076 updateLineInfoForEOL();
1077 updateFlagsForEOL();
1078 }
1080 goto retry;
1081 }
1083 tp = newToken(-1);
1085 // '$' and '_' don't pass IsLetter, but they're < 128 so never appear here.
1086 JS_STATIC_ASSERT('$' < 128 && '_' < 128);
1087 if (IsLetter(c)) {
1088 identStart = userbuf.addressOfNextRawChar() - 1;
1089 hadUnicodeEscape = false;
1090 goto identifier;
1091 }
1093 goto badchar;
1094 }
1096 // Get the token kind, based on the first char. The ordering of c1kind
1097 // comparison is based on the frequency of tokens in real code -- Parsemark
1098 // (which represents typical JS code on the web) and the Unreal demo (which
1099 // represents asm.js code).
1100 //
1101 // Parsemark Unreal
1102 // OneChar 32.9% 39.7%
1103 // Space 25.0% 0.6%
1104 // Ident 19.2% 36.4%
1105 // Dec 7.2% 5.1%
1106 // String 7.9% 0.0%
1107 // EOL 1.7% 0.0%
1108 // BasePrefix 0.4% 4.9%
1109 // Other 5.7% 13.3%
1110 //
1111 // The ordering is based mostly only Parsemark frequencies, with Unreal
1112 // frequencies used to break close categories (e.g. |Dec| and |String|).
1113 // |Other| is biggish, but no other token kind is common enough for it to
1114 // be worth adding extra values to FirstCharKind.
1115 //
1116 c1kind = FirstCharKind(firstCharKinds[c]);
1118 // Look for an unambiguous single-char token.
1119 //
1120 if (c1kind <= OneChar_Max) {
1121 tp = newToken(-1);
1122 tp->type = TokenKind(c1kind);
1123 goto out;
1124 }
1126 // Skip over non-EOL whitespace chars.
1127 //
1128 if (c1kind == Space)
1129 goto retry;
1131 // Look for an identifier.
1132 //
1133 if (c1kind == Ident) {
1134 tp = newToken(-1);
1135 identStart = userbuf.addressOfNextRawChar() - 1;
1136 hadUnicodeEscape = false;
1138 identifier:
1139 for (;;) {
1140 c = getCharIgnoreEOL();
1141 if (c == EOF)
1142 break;
1143 if (!IsIdentifierPart(c)) {
1144 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
1145 break;
1146 hadUnicodeEscape = true;
1147 }
1148 }
1149 ungetCharIgnoreEOL(c);
1151 // Identifiers containing no Unicode escapes can be processed directly
1152 // from userbuf. The rest must use the escapes converted via tokenbuf
1153 // before atomizing.
1154 const jschar *chars;
1155 size_t length;
1156 if (hadUnicodeEscape) {
1157 if (!putIdentInTokenbuf(identStart))
1158 goto error;
1160 chars = tokenbuf.begin();
1161 length = tokenbuf.length();
1162 } else {
1163 chars = identStart;
1164 length = userbuf.addressOfNextRawChar() - identStart;
1165 }
1167 // Check for keywords unless the parser told us not to.
1168 if (modifier != KeywordIsName) {
1169 tp->type = TOK_NAME;
1170 if (!checkForKeyword(chars, length, &tp->type))
1171 goto error;
1172 if (tp->type != TOK_NAME)
1173 goto out;
1174 }
1176 JSAtom *atom = AtomizeChars(cx, chars, length);
1177 if (!atom)
1178 goto error;
1179 tp->type = TOK_NAME;
1180 tp->setName(atom->asPropertyName());
1181 goto out;
1182 }
1184 // Look for a decimal number.
1185 //
1186 if (c1kind == Dec) {
1187 tp = newToken(-1);
1188 numStart = userbuf.addressOfNextRawChar() - 1;
1190 decimal:
1191 decimalPoint = NoDecimal;
1192 hasExp = false;
1193 while (JS7_ISDEC(c))
1194 c = getCharIgnoreEOL();
1196 if (c == '.') {
1197 decimalPoint = HasDecimal;
1198 decimal_dot:
1199 do {
1200 c = getCharIgnoreEOL();
1201 } while (JS7_ISDEC(c));
1202 }
1203 if (c == 'e' || c == 'E') {
1204 hasExp = true;
1205 c = getCharIgnoreEOL();
1206 if (c == '+' || c == '-')
1207 c = getCharIgnoreEOL();
1208 if (!JS7_ISDEC(c)) {
1209 ungetCharIgnoreEOL(c);
1210 reportError(JSMSG_MISSING_EXPONENT);
1211 goto error;
1212 }
1213 do {
1214 c = getCharIgnoreEOL();
1215 } while (JS7_ISDEC(c));
1216 }
1217 ungetCharIgnoreEOL(c);
1219 if (c != EOF && IsIdentifierStart(c)) {
1220 reportError(JSMSG_IDSTART_AFTER_NUMBER);
1221 goto error;
1222 }
1224 // Unlike identifiers and strings, numbers cannot contain escaped
1225 // chars, so we don't need to use tokenbuf. Instead we can just
1226 // convert the jschars in userbuf directly to the numeric value.
1227 double dval;
1228 if (!((decimalPoint == HasDecimal) || hasExp)) {
1229 if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
1230 goto error;
1231 } else {
1232 const jschar *dummy;
1233 if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
1234 goto error;
1235 }
1236 tp->type = TOK_NUMBER;
1237 tp->setNumber(dval, decimalPoint);
1238 goto out;
1239 }
1241 // Look for a string.
1242 //
1243 if (c1kind == String) {
1244 tp = newToken(-1);
1245 qc = c;
1246 tokenbuf.clear();
1247 while (true) {
1248 // We need to detect any of these chars: " or ', \n (or its
1249 // equivalents), \\, EOF. We use maybeStrSpecial[] in a manner
1250 // similar to maybeEOL[], see above. Because we detect EOL
1251 // sequences here and put them back immediately, we can use
1252 // getCharIgnoreEOL().
1253 c = getCharIgnoreEOL();
1254 if (maybeStrSpecial[c & 0xff]) {
1255 if (c == qc)
1256 break;
1257 if (c == '\\') {
1258 switch (c = getChar()) {
1259 case 'b': c = '\b'; break;
1260 case 'f': c = '\f'; break;
1261 case 'n': c = '\n'; break;
1262 case 'r': c = '\r'; break;
1263 case 't': c = '\t'; break;
1264 case 'v': c = '\v'; break;
1266 default:
1267 if ('0' <= c && c < '8') {
1268 int32_t val = JS7_UNDEC(c);
1270 c = peekChar();
1271 // Strict mode code allows only \0, then a non-digit.
1272 if (val != 0 || JS7_ISDEC(c)) {
1273 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1274 goto error;
1275 flags.sawOctalEscape = true;
1276 }
1277 if ('0' <= c && c < '8') {
1278 val = 8 * val + JS7_UNDEC(c);
1279 getChar();
1280 c = peekChar();
1281 if ('0' <= c && c < '8') {
1282 int32_t save = val;
1283 val = 8 * val + JS7_UNDEC(c);
1284 if (val <= 0377)
1285 getChar();
1286 else
1287 val = save;
1288 }
1289 }
1291 c = jschar(val);
1292 } else if (c == 'u') {
1293 jschar cp[4];
1294 if (peekChars(4, cp) &&
1295 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1296 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1297 c = (((((JS7_UNHEX(cp[0]) << 4)
1298 + JS7_UNHEX(cp[1])) << 4)
1299 + JS7_UNHEX(cp[2])) << 4)
1300 + JS7_UNHEX(cp[3]);
1301 skipChars(4);
1302 } else {
1303 reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
1304 goto error;
1305 }
1306 } else if (c == 'x') {
1307 jschar cp[2];
1308 if (peekChars(2, cp) &&
1309 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1310 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1311 skipChars(2);
1312 } else {
1313 reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
1314 goto error;
1315 }
1316 } else if (c == '\n') {
1317 // ES5 7.8.4: an escaped line terminator represents
1318 // no character.
1319 continue;
1320 }
1321 break;
1322 }
1323 } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
1324 ungetCharIgnoreEOL(c);
1325 reportError(JSMSG_UNTERMINATED_STRING);
1326 goto error;
1327 }
1328 }
1329 if (!tokenbuf.append(c))
1330 goto error;
1331 }
1332 JSAtom *atom = atomize(cx, tokenbuf);
1333 if (!atom)
1334 goto error;
1335 tp->type = TOK_STRING;
1336 tp->setAtom(atom);
1337 goto out;
1338 }
1340 // Skip over EOL chars, updating line state along the way.
1341 //
1342 if (c1kind == EOL) {
1343 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
1344 if (c == '\r' && userbuf.hasRawChars())
1345 userbuf.matchRawChar('\n');
1346 updateLineInfoForEOL();
1347 updateFlagsForEOL();
1348 goto retry;
1349 }
1351 // Look for a hexadecimal, octal, or binary number.
1352 //
1353 if (c1kind == BasePrefix) {
1354 tp = newToken(-1);
1355 int radix;
1356 c = getCharIgnoreEOL();
1357 if (c == 'x' || c == 'X') {
1358 radix = 16;
1359 c = getCharIgnoreEOL();
1360 if (!JS7_ISHEX(c)) {
1361 ungetCharIgnoreEOL(c);
1362 reportError(JSMSG_MISSING_HEXDIGITS);
1363 goto error;
1364 }
1365 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0x'
1366 while (JS7_ISHEX(c))
1367 c = getCharIgnoreEOL();
1368 } else if (c == 'b' || c == 'B') {
1369 radix = 2;
1370 c = getCharIgnoreEOL();
1371 if (c != '0' && c != '1') {
1372 ungetCharIgnoreEOL(c);
1373 reportError(JSMSG_MISSING_BINARY_DIGITS);
1374 goto error;
1375 }
1376 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0b'
1377 while (c == '0' || c == '1')
1378 c = getCharIgnoreEOL();
1379 } else if (c == 'o' || c == 'O') {
1380 radix = 8;
1381 c = getCharIgnoreEOL();
1382 if (c < '0' || c > '7') {
1383 ungetCharIgnoreEOL(c);
1384 reportError(JSMSG_MISSING_OCTAL_DIGITS);
1385 goto error;
1386 }
1387 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0o'
1388 while ('0' <= c && c <= '7')
1389 c = getCharIgnoreEOL();
1390 } else if (JS7_ISDEC(c)) {
1391 radix = 8;
1392 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0'
1393 while (JS7_ISDEC(c)) {
1394 // Octal integer literals are not permitted in strict mode code.
1395 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
1396 goto error;
1398 // Outside strict mode, we permit 08 and 09 as decimal numbers,
1399 // which makes our behaviour a superset of the ECMA numeric
1400 // grammar. We might not always be so permissive, so we warn
1401 // about it.
1402 if (c >= '8') {
1403 if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1404 goto error;
1405 }
1406 goto decimal; // use the decimal scanner for the rest of the number
1407 }
1408 c = getCharIgnoreEOL();
1409 }
1410 } else {
1411 // '0' not followed by 'x', 'X' or a digit; scan as a decimal number.
1412 numStart = userbuf.addressOfNextRawChar() - 1;
1413 goto decimal;
1414 }
1415 ungetCharIgnoreEOL(c);
1417 if (c != EOF && IsIdentifierStart(c)) {
1418 reportError(JSMSG_IDSTART_AFTER_NUMBER);
1419 goto error;
1420 }
1422 double dval;
1423 const jschar *dummy;
1424 if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
1425 goto error;
1426 tp->type = TOK_NUMBER;
1427 tp->setNumber(dval, NoDecimal);
1428 goto out;
1429 }
1431 // This handles everything else.
1432 //
1433 JS_ASSERT(c1kind == Other);
1434 tp = newToken(-1);
1435 switch (c) {
1436 case '.':
1437 c = getCharIgnoreEOL();
1438 if (JS7_ISDEC(c)) {
1439 numStart = userbuf.addressOfNextRawChar() - 2;
1440 decimalPoint = HasDecimal;
1441 hasExp = false;
1442 goto decimal_dot;
1443 }
1444 if (c == '.') {
1445 if (matchChar('.')) {
1446 tp->type = TOK_TRIPLEDOT;
1447 goto out;
1448 }
1449 }
1450 ungetCharIgnoreEOL(c);
1451 tp->type = TOK_DOT;
1452 goto out;
1454 case '=':
1455 if (matchChar('='))
1456 tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
1457 else if (matchChar('>'))
1458 tp->type = TOK_ARROW;
1459 else
1460 tp->type = TOK_ASSIGN;
1461 goto out;
1463 case '+':
1464 if (matchChar('+'))
1465 tp->type = TOK_INC;
1466 else
1467 tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
1468 goto out;
1470 case '\\':
1471 hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
1472 if (hadUnicodeEscape) {
1473 identStart = userbuf.addressOfNextRawChar() - 6;
1474 goto identifier;
1475 }
1476 goto badchar;
1478 case '|':
1479 if (matchChar('|'))
1480 tp->type = TOK_OR;
1481 else
1482 tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
1483 goto out;
1485 case '^':
1486 tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
1487 goto out;
1489 case '&':
1490 if (matchChar('&'))
1491 tp->type = TOK_AND;
1492 else
1493 tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
1494 goto out;
1496 case '!':
1497 if (matchChar('='))
1498 tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
1499 else
1500 tp->type = TOK_NOT;
1501 goto out;
1503 case '<':
1504 // NB: treat HTML begin-comment as comment-till-end-of-line.
1505 if (matchChar('!')) {
1506 if (matchChar('-')) {
1507 if (matchChar('-'))
1508 goto skipline;
1509 ungetChar('-');
1510 }
1511 ungetChar('!');
1512 }
1513 if (matchChar('<')) {
1514 tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
1515 } else {
1516 tp->type = matchChar('=') ? TOK_LE : TOK_LT;
1517 }
1518 goto out;
1520 case '>':
1521 if (matchChar('>')) {
1522 if (matchChar('>'))
1523 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
1524 else
1525 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
1526 } else {
1527 tp->type = matchChar('=') ? TOK_GE : TOK_GT;
1528 }
1529 goto out;
1531 case '*':
1532 tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
1533 goto out;
1535 case '/':
1536 // Look for a single-line comment.
1537 if (matchChar('/')) {
1538 c = peekChar();
1539 if (c == '@' || c == '#') {
1540 bool shouldWarn = getChar() == '@';
1541 if (!getDirectives(false, shouldWarn))
1542 goto error;
1543 }
1545 skipline:
1546 while ((c = getChar()) != EOF && c != '\n')
1547 continue;
1548 ungetChar(c);
1549 cursor = (cursor - 1) & ntokensMask;
1550 goto retry;
1551 }
1553 // Look for a multi-line comment.
1554 if (matchChar('*')) {
1555 unsigned linenoBefore = lineno;
1556 while ((c = getChar()) != EOF &&
1557 !(c == '*' && matchChar('/'))) {
1558 if (c == '@' || c == '#') {
1559 bool shouldWarn = c == '@';
1560 if (!getDirectives(true, shouldWarn))
1561 goto error;
1562 }
1563 }
1564 if (c == EOF) {
1565 reportError(JSMSG_UNTERMINATED_COMMENT);
1566 goto error;
1567 }
1568 if (linenoBefore != lineno)
1569 updateFlagsForEOL();
1570 cursor = (cursor - 1) & ntokensMask;
1571 goto retry;
1572 }
1574 // Look for a regexp.
1575 if (modifier == Operand) {
1576 tokenbuf.clear();
1578 bool inCharClass = false;
1579 for (;;) {
1580 c = getChar();
1581 if (c == '\\') {
1582 if (!tokenbuf.append(c))
1583 goto error;
1584 c = getChar();
1585 } else if (c == '[') {
1586 inCharClass = true;
1587 } else if (c == ']') {
1588 inCharClass = false;
1589 } else if (c == '/' && !inCharClass) {
1590 // For compat with IE, allow unescaped / in char classes.
1591 break;
1592 }
1593 if (c == '\n' || c == EOF) {
1594 ungetChar(c);
1595 reportError(JSMSG_UNTERMINATED_REGEXP);
1596 goto error;
1597 }
1598 if (!tokenbuf.append(c))
1599 goto error;
1600 }
1602 RegExpFlag reflags = NoFlags;
1603 unsigned length = tokenbuf.length() + 1;
1604 while (true) {
1605 c = peekChar();
1606 if (c == 'g' && !(reflags & GlobalFlag))
1607 reflags = RegExpFlag(reflags | GlobalFlag);
1608 else if (c == 'i' && !(reflags & IgnoreCaseFlag))
1609 reflags = RegExpFlag(reflags | IgnoreCaseFlag);
1610 else if (c == 'm' && !(reflags & MultilineFlag))
1611 reflags = RegExpFlag(reflags | MultilineFlag);
1612 else if (c == 'y' && !(reflags & StickyFlag))
1613 reflags = RegExpFlag(reflags | StickyFlag);
1614 else
1615 break;
1616 getChar();
1617 length++;
1618 }
1620 c = peekChar();
1621 if (JS7_ISLET(c)) {
1622 char buf[2] = { '\0', '\0' };
1623 tp->pos.begin += length + 1;
1624 buf[0] = char(c);
1625 reportError(JSMSG_BAD_REGEXP_FLAG, buf);
1626 (void) getChar();
1627 goto error;
1628 }
1629 tp->type = TOK_REGEXP;
1630 tp->setRegExpFlags(reflags);
1631 goto out;
1632 }
1634 tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
1635 goto out;
1637 case '%':
1638 tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
1639 goto out;
1641 case '-':
1642 if (matchChar('-')) {
1643 if (peekChar() == '>' && !flags.isDirtyLine)
1644 goto skipline;
1645 tp->type = TOK_DEC;
1646 } else {
1647 tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
1648 }
1649 goto out;
1651 badchar:
1652 default:
1653 reportError(JSMSG_ILLEGAL_CHARACTER);
1654 goto error;
1655 }
1657 MOZ_ASSUME_UNREACHABLE("should have jumped to |out| or |error|");
1659 out:
1660 flags.isDirtyLine = true;
1661 tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
1662 JS_ASSERT(IsTokenSane(tp));
1663 return tp->type;
1665 error:
1666 flags.isDirtyLine = true;
1667 tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
1668 tp->type = TOK_ERROR;
1669 JS_ASSERT(IsTokenSane(tp));
1670 onError();
1671 return TOK_ERROR;
1672 }
1674 void
1675 TokenStream::onError()
1676 {
1677 flags.hadError = true;
1678 #ifdef DEBUG
1679 // Poisoning userbuf on error establishes an invariant: once an erroneous
1680 // token has been seen, userbuf will not be consulted again. This is true
1681 // because the parser will either (a) deal with the TOK_ERROR token by
1682 // aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
1683 // match what it expected, it will unget the token, and the next getToken()
1684 // call will immediately return the just-gotten TOK_ERROR token again
1685 // without consulting userbuf, thanks to the lookahead buffer.
1686 userbuf.poison();
1687 #endif
1688 }
1690 JS_FRIEND_API(int)
1691 js_fgets(char *buf, int size, FILE *file)
1692 {
1693 int n, i, c;
1694 bool crflag;
1696 n = size - 1;
1697 if (n < 0)
1698 return -1;
1700 crflag = false;
1701 for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
1702 buf[i] = c;
1703 if (c == '\n') { // any \n ends a line
1704 i++; // keep the \n; we know there is room for \0
1705 break;
1706 }
1707 if (crflag) { // \r not followed by \n ends line at the \r
1708 ungetc(c, file);
1709 break; // and overwrite c in buf with \0
1710 }
1711 crflag = (c == '\r');
1712 }
1714 buf[i] = '\0';
1715 return i;
1716 }
1718 #ifdef DEBUG
1719 const char *
1720 TokenKindToString(TokenKind tt)
1721 {
1722 switch (tt) {
1723 case TOK_ERROR: return "TOK_ERROR";
1724 case TOK_EOF: return "TOK_EOF";
1725 case TOK_EOL: return "TOK_EOL";
1726 case TOK_SEMI: return "TOK_SEMI";
1727 case TOK_COMMA: return "TOK_COMMA";
1728 case TOK_HOOK: return "TOK_HOOK";
1729 case TOK_COLON: return "TOK_COLON";
1730 case TOK_OR: return "TOK_OR";
1731 case TOK_AND: return "TOK_AND";
1732 case TOK_BITOR: return "TOK_BITOR";
1733 case TOK_BITXOR: return "TOK_BITXOR";
1734 case TOK_BITAND: return "TOK_BITAND";
1735 case TOK_ADD: return "TOK_ADD";
1736 case TOK_SUB: return "TOK_SUB";
1737 case TOK_MUL: return "TOK_MUL";
1738 case TOK_DIV: return "TOK_DIV";
1739 case TOK_MOD: return "TOK_MOD";
1740 case TOK_INC: return "TOK_INC";
1741 case TOK_DEC: return "TOK_DEC";
1742 case TOK_DOT: return "TOK_DOT";
1743 case TOK_TRIPLEDOT: return "TOK_TRIPLEDOT";
1744 case TOK_LB: return "TOK_LB";
1745 case TOK_RB: return "TOK_RB";
1746 case TOK_LC: return "TOK_LC";
1747 case TOK_RC: return "TOK_RC";
1748 case TOK_LP: return "TOK_LP";
1749 case TOK_RP: return "TOK_RP";
1750 case TOK_ARROW: return "TOK_ARROW";
1751 case TOK_NAME: return "TOK_NAME";
1752 case TOK_NUMBER: return "TOK_NUMBER";
1753 case TOK_STRING: return "TOK_STRING";
1754 case TOK_REGEXP: return "TOK_REGEXP";
1755 case TOK_TRUE: return "TOK_TRUE";
1756 case TOK_FALSE: return "TOK_FALSE";
1757 case TOK_NULL: return "TOK_NULL";
1758 case TOK_THIS: return "TOK_THIS";
1759 case TOK_FUNCTION: return "TOK_FUNCTION";
1760 case TOK_IF: return "TOK_IF";
1761 case TOK_ELSE: return "TOK_ELSE";
1762 case TOK_SWITCH: return "TOK_SWITCH";
1763 case TOK_CASE: return "TOK_CASE";
1764 case TOK_DEFAULT: return "TOK_DEFAULT";
1765 case TOK_WHILE: return "TOK_WHILE";
1766 case TOK_DO: return "TOK_DO";
1767 case TOK_FOR: return "TOK_FOR";
1768 case TOK_BREAK: return "TOK_BREAK";
1769 case TOK_CONTINUE: return "TOK_CONTINUE";
1770 case TOK_IN: return "TOK_IN";
1771 case TOK_VAR: return "TOK_VAR";
1772 case TOK_CONST: return "TOK_CONST";
1773 case TOK_WITH: return "TOK_WITH";
1774 case TOK_RETURN: return "TOK_RETURN";
1775 case TOK_NEW: return "TOK_NEW";
1776 case TOK_DELETE: return "TOK_DELETE";
1777 case TOK_TRY: return "TOK_TRY";
1778 case TOK_CATCH: return "TOK_CATCH";
1779 case TOK_FINALLY: return "TOK_FINALLY";
1780 case TOK_THROW: return "TOK_THROW";
1781 case TOK_INSTANCEOF: return "TOK_INSTANCEOF";
1782 case TOK_DEBUGGER: return "TOK_DEBUGGER";
1783 case TOK_YIELD: return "TOK_YIELD";
1784 case TOK_LET: return "TOK_LET";
1785 case TOK_RESERVED: return "TOK_RESERVED";
1786 case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
1787 case TOK_STRICTEQ: return "TOK_STRICTEQ";
1788 case TOK_EQ: return "TOK_EQ";
1789 case TOK_STRICTNE: return "TOK_STRICTNE";
1790 case TOK_NE: return "TOK_NE";
1791 case TOK_TYPEOF: return "TOK_TYPEOF";
1792 case TOK_VOID: return "TOK_VOID";
1793 case TOK_NOT: return "TOK_NOT";
1794 case TOK_BITNOT: return "TOK_BITNOT";
1795 case TOK_LT: return "TOK_LT";
1796 case TOK_LE: return "TOK_LE";
1797 case TOK_GT: return "TOK_GT";
1798 case TOK_GE: return "TOK_GE";
1799 case TOK_LSH: return "TOK_LSH";
1800 case TOK_RSH: return "TOK_RSH";
1801 case TOK_URSH: return "TOK_URSH";
1802 case TOK_ASSIGN: return "TOK_ASSIGN";
1803 case TOK_ADDASSIGN: return "TOK_ADDASSIGN";
1804 case TOK_SUBASSIGN: return "TOK_SUBASSIGN";
1805 case TOK_BITORASSIGN: return "TOK_BITORASSIGN";
1806 case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN";
1807 case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN";
1808 case TOK_LSHASSIGN: return "TOK_LSHASSIGN";
1809 case TOK_RSHASSIGN: return "TOK_RSHASSIGN";
1810 case TOK_URSHASSIGN: return "TOK_URSHASSIGN";
1811 case TOK_MULASSIGN: return "TOK_MULASSIGN";
1812 case TOK_DIVASSIGN: return "TOK_DIVASSIGN";
1813 case TOK_MODASSIGN: return "TOK_MODASSIGN";
1814 case TOK_EXPORT: return "TOK_EXPORT";
1815 case TOK_IMPORT: return "TOK_IMPORT";
1816 case TOK_LIMIT: break;
1817 }
1819 return "<bad TokenKind>";
1820 }
1821 #endif