The Tor Browser: js/src/frontend/TokenStream.cpp@6474c204b198

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-

     2  * vim: set ts=8 sts=4 et sw=4 tw=99:

     3  * This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 // JS lexical scanner.

     9 #include "frontend/TokenStream.h"

    11 #include "mozilla/PodOperations.h"

    13 #include <ctype.h>

    14 #include <stdarg.h>

    15 #include <stdio.h>

    16 #include <string.h>

    18 #include "jsatom.h"

    19 #include "jscntxt.h"

    20 #include "jsexn.h"

    21 #include "jsnum.h"

    22 #include "jsworkers.h"

    24 #include "frontend/BytecodeCompiler.h"

    25 #include "js/CharacterEncoding.h"

    26 #include "vm/Keywords.h"

    27 #include "vm/StringBuffer.h"

    29 using namespace js;

    30 using namespace js::frontend;

    31 using namespace js::unicode;

    33 using mozilla::Maybe;

    34 using mozilla::PodAssign;

    35 using mozilla::PodCopy;

    36 using mozilla::PodZero;

    38 struct KeywordInfo {

    39     const char  *chars;         // C string with keyword text

    40     TokenKind   tokentype;

    41     JSVersion   version;

    42 };

    44 static const KeywordInfo keywords[] = {

    45 #define KEYWORD_INFO(keyword, name, type, version) \

    46     {js_##keyword##_str, type, version},

    47     FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)

    48 #undef KEYWORD_INFO

    49 };

    51 // Returns a KeywordInfo for the specified characters, or nullptr if the string

    52 // is not a keyword.

    53 static const KeywordInfo *

    54 FindKeyword(const jschar *s, size_t length)

    55 {

    56     JS_ASSERT(length != 0);

    58     size_t i;

    59     const KeywordInfo *kw;

    60     const char *chars;

    62 #define JSKW_LENGTH()           length

    63 #define JSKW_AT(column)         s[column]

    64 #define JSKW_GOT_MATCH(index)   i = (index); goto got_match;

    65 #define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;

    66 #define JSKW_NO_MATCH()         goto no_match;

    67 #include "jsautokw.h"

    68 #undef JSKW_NO_MATCH

    69 #undef JSKW_TEST_GUESS

    70 #undef JSKW_GOT_MATCH

    71 #undef JSKW_AT

    72 #undef JSKW_LENGTH

    74   got_match:

    75     return &keywords[i];

    77   test_guess:

    78     kw = &keywords[i];

    79     chars = kw->chars;

    80     do {

    81         if (*s++ != (unsigned char)(*chars++))

    82             goto no_match;

    83     } while (--length != 0);

    84     return kw;

    86   no_match:

    87     return nullptr;

    88 }

    90 bool

    91 frontend::IsIdentifier(JSLinearString *str)

    92 {

    93     const jschar *chars = str->chars();

    94     size_t length = str->length();

    96     if (length == 0)

    97         return false;

    98     jschar c = *chars;

    99     if (!IsIdentifierStart(c))

   100         return false;

   101     const jschar *end = chars + length;

   102     while (++chars != end) {

   103         c = *chars;

   104         if (!IsIdentifierPart(c))

   105             return false;

   106     }

   107     return true;

   108 }

   110 bool

   111 frontend::IsKeyword(JSLinearString *str)

   112 {

   113     return FindKeyword(str->chars(), str->length()) != nullptr;

   114 }

   116 TokenStream::SourceCoords::SourceCoords(ExclusiveContext *cx, uint32_t ln)

   117   : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)

   118 {

   119     // This is actually necessary!  Removing it causes compile errors on

   120     // GCC and clang.  You could try declaring this:

   121     //

   122     //   const uint32_t TokenStream::SourceCoords::MAX_PTR;

   123     //

   124     // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.

   125     //

   126     uint32_t maxPtr = MAX_PTR;

   128     // The first line begins at buffer offset 0.  MAX_PTR is the sentinel.  The

   129     // appends cannot fail because |lineStartOffsets_| has statically-allocated

   130     // elements.

   131     JS_ASSERT(lineStartOffsets_.capacity() >= 2);

   132     (void)lineStartOffsets_.reserve(2);

   133     lineStartOffsets_.infallibleAppend(0);

   134     lineStartOffsets_.infallibleAppend(maxPtr);

   135 }

   137 MOZ_ALWAYS_INLINE void

   138 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)

   139 {

   140     uint32_t lineIndex = lineNumToIndex(lineNum);

   141     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;

   143     JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);

   145     if (lineIndex == sentinelIndex) {

   146         // We haven't seen this newline before.  Update lineStartOffsets_.

   147         // We ignore any failures due to OOM -- because we always have a

   148         // sentinel node, it'll just be like the newline wasn't present.  I.e.

   149         // the line numbers will be wrong, but the code won't crash or anything

   150         // like that.

   151         lineStartOffsets_[lineIndex] = lineStartOffset;

   153         uint32_t maxPtr = MAX_PTR;

   154         (void)lineStartOffsets_.append(maxPtr);

   156     } else {

   157         // We have seen this newline before (and ungot it).  Do nothing (other

   158         // than checking it hasn't mysteriously changed).

   159         JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset);

   160     }

   161 }

   163 MOZ_ALWAYS_INLINE bool

   164 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other)

   165 {

   166     JS_ASSERT(lineStartOffsets_.back() == MAX_PTR);

   167     JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);

   169     if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())

   170         return true;

   172     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;

   173     lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];

   175     for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {

   176         if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))

   177             return false;

   178     }

   179     return true;

   180 }

   182 MOZ_ALWAYS_INLINE uint32_t

   183 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const

   184 {

   185     uint32_t iMin, iMax, iMid;

   187     if (lineStartOffsets_[lastLineIndex_] <= offset) {

   188         // If we reach here, offset is on a line the same as or higher than

   189         // last time.  Check first for the +0, +1, +2 cases, because they

   190         // typically cover 85--98% of cases.

   191         if (offset < lineStartOffsets_[lastLineIndex_ + 1])

   192             return lastLineIndex_;      // lineIndex is same as last time

   194         // If we reach here, there must be at least one more entry (plus the

   195         // sentinel).  Try it.

   196         lastLineIndex_++;

   197         if (offset < lineStartOffsets_[lastLineIndex_ + 1])

   198             return lastLineIndex_;      // lineIndex is one higher than last time

   200         // The same logic applies here.

   201         lastLineIndex_++;

   202         if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {

   203             return lastLineIndex_;      // lineIndex is two higher than last time

   204         }

   206         // No luck.  Oh well, we have a better-than-default starting point for

   207         // the binary search.

   208         iMin = lastLineIndex_ + 1;

   209         JS_ASSERT(iMin < lineStartOffsets_.length() - 1);   // -1 due to the sentinel

   211     } else {

   212         iMin = 0;

   213     }

   215     // This is a binary search with deferred detection of equality, which was

   216     // marginally faster in this case than a standard binary search.

   217     // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we

   218     // want one before that.

   219     iMax = lineStartOffsets_.length() - 2;

   220     while (iMax > iMin) {

   221         iMid = iMin + (iMax - iMin) / 2;

   222         if (offset >= lineStartOffsets_[iMid + 1])

   223             iMin = iMid + 1;    // offset is above lineStartOffsets_[iMid]

   224         else

   225             iMax = iMid;        // offset is below or within lineStartOffsets_[iMid]

   226     }

   227     JS_ASSERT(iMax == iMin);

   228     JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);

   229     lastLineIndex_ = iMin;

   230     return iMin;

   231 }

   233 uint32_t

   234 TokenStream::SourceCoords::lineNum(uint32_t offset) const

   235 {

   236     uint32_t lineIndex = lineIndexOf(offset);

   237     return lineIndexToNum(lineIndex);

   238 }

   240 uint32_t

   241 TokenStream::SourceCoords::columnIndex(uint32_t offset) const

   242 {

   243     uint32_t lineIndex = lineIndexOf(offset);

   244     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];

   245     JS_ASSERT(offset >= lineStartOffset);

   246     return offset - lineStartOffset;

   247 }

   249 void

   250 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum,

   251                                                  uint32_t *columnIndex) const

   252 {

   253     uint32_t lineIndex = lineIndexOf(offset);

   254     *lineNum = lineIndexToNum(lineIndex);

   255     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];

   256     JS_ASSERT(offset >= lineStartOffset);

   257     *columnIndex = offset - lineStartOffset;

   258 }

   260 #ifdef _MSC_VER

   261 #pragma warning(push)

   262 #pragma warning(disable:4351)

   263 #endif

   265 // Initialize members that aren't initialized in |init|.

   266 TokenStream::TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,

   267                          const jschar *base, size_t length, StrictModeGetter *smg)

   268   : srcCoords(cx, options.lineno),

   269     options_(options),

   270     tokens(),

   271     cursor(),

   272     lookahead(),

   273     lineno(options.lineno),

   274     flags(),

   275     linebase(base - options.column),

   276     prevLinebase(nullptr),

   277     userbuf(cx, base - options.column, length + options.column), // See comment below

   278     filename(options.filename()),

   279     displayURL_(nullptr),

   280     sourceMapURL_(nullptr),

   281     tokenbuf(cx),

   282     cx(cx),

   283     originPrincipals(options.originPrincipals(cx)),

   284     strictModeGetter(smg)

   285 {

   286     // The caller must ensure that a reference is held on the supplied principals

   287     // throughout compilation.

   288     JS_ASSERT_IF(originPrincipals, originPrincipals->refcount > 0);

   290     // Column numbers are computed as offsets from the current line's base, so the

   291     // initial line's base must be included in the buffer. linebase and userbuf

   292     // were adjusted above, and if we are starting tokenization part way through

   293     // this line then adjust the next character.

   294     userbuf.setAddressOfNextRawChar(base);

   296     // Nb: the following tables could be static, but initializing them here is

   297     // much easier.  Don't worry, the time to initialize them for each

   298     // TokenStream is trivial.  See bug 639420.

   300     // See getChar() for an explanation of maybeEOL[].

   301     memset(maybeEOL, 0, sizeof(maybeEOL));

   302     maybeEOL[unsigned('\n')] = true;

   303     maybeEOL[unsigned('\r')] = true;

   304     maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;

   305     maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;

   307     // See getTokenInternal() for an explanation of maybeStrSpecial[].

   308     memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));

   309     maybeStrSpecial[unsigned('"')] = true;

   310     maybeStrSpecial[unsigned('\'')] = true;

   311     maybeStrSpecial[unsigned('\\')] = true;

   312     maybeStrSpecial[unsigned('\n')] = true;

   313     maybeStrSpecial[unsigned('\r')] = true;

   314     maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;

   315     maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;

   316     maybeStrSpecial[unsigned(EOF & 0xff)] = true;

   318     // See Parser::assignExpr() for an explanation of isExprEnding[].

   319     memset(isExprEnding, 0, sizeof(isExprEnding));

   320     isExprEnding[TOK_COMMA] = 1;

   321     isExprEnding[TOK_SEMI]  = 1;

   322     isExprEnding[TOK_COLON] = 1;

   323     isExprEnding[TOK_RP]    = 1;

   324     isExprEnding[TOK_RB]    = 1;

   325     isExprEnding[TOK_RC]    = 1;

   326 }

   328 #ifdef _MSC_VER

   329 #pragma warning(pop)

   330 #endif

   332 TokenStream::~TokenStream()

   333 {

   334     js_free(displayURL_);

   335     js_free(sourceMapURL_);

   337     JS_ASSERT_IF(originPrincipals, originPrincipals->refcount);

   338 }

   340 // Use the fastest available getc.

   341 #if defined(HAVE_GETC_UNLOCKED)

   342 # define fast_getc getc_unlocked

   343 #elif defined(HAVE__GETC_NOLOCK)

   344 # define fast_getc _getc_nolock

   345 #else

   346 # define fast_getc getc

   347 #endif

   349 MOZ_ALWAYS_INLINE void

   350 TokenStream::updateLineInfoForEOL()

   351 {

   352     prevLinebase = linebase;

   353     linebase = userbuf.addressOfNextRawChar();

   354     lineno++;

   355     srcCoords.add(lineno, linebase - userbuf.base());

   356 }

   358 MOZ_ALWAYS_INLINE void

   359 TokenStream::updateFlagsForEOL()

   360 {

   361     flags.isDirtyLine = false;

   362 }

   364 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.

   365 int32_t

   366 TokenStream::getChar()

   367 {

   368     int32_t c;

   369     if (MOZ_LIKELY(userbuf.hasRawChars())) {

   370         c = userbuf.getRawChar();

   372         // Normalize the jschar if it was a newline.  We need to detect any of

   373         // these four characters:  '\n' (0x000a), '\r' (0x000d),

   374         // LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029).  Testing for each

   375         // one in turn is slow, so we use a single probabilistic check, and if

   376         // that succeeds, test for them individually.

   377         //

   378         // We use the bottom 8 bits to index into a lookup table, succeeding

   379         // when d&0xff is 0xa, 0xd, 0x28 or 0x29.  Among ASCII chars (which

   380         // are by the far the most common) this gives false positives for '('

   381         // (0x0028) and ')' (0x0029).  We could avoid those by incorporating

   382         // the 13th bit of d into the lookup, but that requires extra shifting

   383         // and masking and isn't worthwhile.  See TokenStream::TokenStream()

   384         // for the initialization of the relevant entries in the table.

   385         if (MOZ_UNLIKELY(maybeEOL[c & 0xff])) {

   386             if (c == '\n')

   387                 goto eol;

   388             if (c == '\r') {

   389                 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.

   390                 if (userbuf.hasRawChars())

   391                     userbuf.matchRawChar('\n');

   392                 goto eol;

   393             }

   394             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)

   395                 goto eol;

   396         }

   397         return c;

   398     }

   400     flags.isEOF = true;

   401     return EOF;

   403   eol:

   404     updateLineInfoForEOL();

   405     return '\n';

   406 }

   408 // This gets the next char. It does nothing special with EOL sequences, not

   409 // even updating the line counters.  It can be used safely if (a) the

   410 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if

   411 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used

   412 // before it's ungotten.

   413 int32_t

   414 TokenStream::getCharIgnoreEOL()

   415 {

   416     if (MOZ_LIKELY(userbuf.hasRawChars()))

   417         return userbuf.getRawChar();

   419     flags.isEOF = true;

   420     return EOF;

   421 }

   423 void

   424 TokenStream::ungetChar(int32_t c)

   425 {

   426     if (c == EOF)

   427         return;

   428     JS_ASSERT(!userbuf.atStart());

   429     userbuf.ungetRawChar();

   430     if (c == '\n') {

   431 #ifdef DEBUG

   432         int32_t c2 = userbuf.peekRawChar();

   433         JS_ASSERT(TokenBuf::isRawEOLChar(c2));

   434 #endif

   436         // If it's a \r\n sequence, also unget the \r.

   437         if (!userbuf.atStart())

   438             userbuf.matchRawCharBackwards('\r');

   440         JS_ASSERT(prevLinebase);    // we should never get more than one EOL char

   441         linebase = prevLinebase;

   442         prevLinebase = nullptr;

   443         lineno--;

   444     } else {

   445         JS_ASSERT(userbuf.peekRawChar() == c);

   446     }

   447 }

   449 void

   450 TokenStream::ungetCharIgnoreEOL(int32_t c)

   451 {

   452     if (c == EOF)

   453         return;

   454     JS_ASSERT(!userbuf.atStart());

   455     userbuf.ungetRawChar();

   456 }

   458 // Return true iff |n| raw characters can be read from this without reading past

   459 // EOF or a newline, and copy those characters into |cp| if so.  The characters

   460 // are not consumed: use skipChars(n) to do so after checking that the consumed

   461 // characters had appropriate values.

   462 bool

   463 TokenStream::peekChars(int n, jschar *cp)

   464 {

   465     int i, j;

   466     int32_t c;

   468     for (i = 0; i < n; i++) {

   469         c = getCharIgnoreEOL();

   470         if (c == EOF)

   471             break;

   472         if (c == '\n') {

   473             ungetCharIgnoreEOL(c);

   474             break;

   475         }

   476         cp[i] = jschar(c);

   477     }

   478     for (j = i - 1; j >= 0; j--)

   479         ungetCharIgnoreEOL(cp[j]);

   480     return i == n;

   481 }

   483 const jschar *

   484 TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max)

   485 {

   486     JS_ASSERT(base_ <= p && p <= limit_);

   488     size_t n = 0;

   489     while (true) {

   490         if (p >= limit_)

   491             break;

   492         if (n >= max)

   493             break;

   494         if (TokenBuf::isRawEOLChar(*p++))

   495             break;

   496         n++;

   497     }

   498     return p;

   499 }

   501 void

   502 TokenStream::advance(size_t position)

   503 {

   504     const jschar *end = userbuf.base() + position;

   505     while (userbuf.addressOfNextRawChar() < end)

   506         getChar();

   508     Token *cur = &tokens[cursor];

   509     cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base();

   510     cur->type = TOK_ERROR;

   511     lookahead = 0;

   512 }

   514 void

   515 TokenStream::tell(Position *pos)

   516 {

   517     pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);

   518     pos->flags = flags;

   519     pos->lineno = lineno;

   520     pos->linebase = linebase;

   521     pos->prevLinebase = prevLinebase;

   522     pos->lookahead = lookahead;

   523     pos->currentToken = currentToken();

   524     for (unsigned i = 0; i < lookahead; i++)

   525         pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];

   526 }

   528 void

   529 TokenStream::seek(const Position &pos)

   530 {

   531     userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);

   532     flags = pos.flags;

   533     lineno = pos.lineno;

   534     linebase = pos.linebase;

   535     prevLinebase = pos.prevLinebase;

   536     lookahead = pos.lookahead;

   538     tokens[cursor] = pos.currentToken;

   539     for (unsigned i = 0; i < lookahead; i++)

   540         tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];

   541 }

   543 bool

   544 TokenStream::seek(const Position &pos, const TokenStream &other)

   545 {

   546     if (!srcCoords.fill(other.srcCoords))

   547         return false;

   548     seek(pos);

   549     return true;

   550 }

   552 bool

   553 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,

   554                                            va_list args)

   555 {

   556     // In strict mode code, this is an error, not merely a warning.

   557     unsigned flags = JSREPORT_STRICT;

   558     if (strictMode)

   559         flags |= JSREPORT_ERROR;

   560     else if (options().extraWarningsOption)

   561         flags |= JSREPORT_WARNING;

   562     else

   563         return true;

   565     return reportCompileErrorNumberVA(offset, flags, errorNumber, args);

   566 }

   568 void

   569 CompileError::throwError(JSContext *cx)

   570 {

   571     // If there's a runtime exception type associated with this error

   572     // number, set that as the pending exception.  For errors occuring at

   573     // compile time, this is very likely to be a JSEXN_SYNTAXERR.

   574     //

   575     // If an exception is thrown but not caught, the JSREPORT_EXCEPTION

   576     // flag will be set in report.flags.  Proper behavior for an error

   577     // reporter is to ignore a report with this flag for all but top-level

   578     // compilation errors.  The exception will remain pending, and so long

   579     // as the non-top-level "load", "eval", or "compile" native function

   580     // returns false, the top-level reporter will eventually receive the

   581     // uncaught exception report.

   582     if (!js_ErrorToException(cx, message, &report, nullptr, nullptr))

   583         CallErrorReporter(cx, message, &report);

   584 }

   586 CompileError::~CompileError()

   587 {

   588     js_free((void*)report.uclinebuf);

   589     js_free((void*)report.linebuf);

   590     js_free((void*)report.ucmessage);

   591     js_free(message);

   592     message = nullptr;

   594     if (report.messageArgs) {

   595         if (argumentsType == ArgumentsAreASCII) {

   596             unsigned i = 0;

   597             while (report.messageArgs[i])

   598                 js_free((void*)report.messageArgs[i++]);

   599         }

   600         js_free(report.messageArgs);

   601     }

   603     PodZero(&report);

   604 }

   606 bool

   607 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,

   608                                         va_list args)

   609 {

   610     bool warning = JSREPORT_IS_WARNING(flags);

   612     if (warning && options().werrorOption) {

   613         flags &= ~JSREPORT_WARNING;

   614         warning = false;

   615     }

   617     // On the main thread, report the error immediately. When compiling off

   618     // thread, save the error so that the main thread can report it later.

   619     CompileError tempErr;

   620     CompileError &err = cx->isJSContext() ? tempErr : cx->addPendingCompileError();

   622     err.report.flags = flags;

   623     err.report.errorNumber = errorNumber;

   624     err.report.filename = filename;

   625     err.report.originPrincipals = originPrincipals;

   626     if (offset == NoOffset) {

   627         err.report.lineno = 0;

   628         err.report.column = 0;

   629     } else {

   630         err.report.lineno = srcCoords.lineNum(offset);

   631         err.report.column = srcCoords.columnIndex(offset);

   632     }

   634     err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;

   636     if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, nullptr, errorNumber, &err.message,

   637                                  &err.report, err.argumentsType, args))

   638     {

   639         return false;

   640     }

   642     // Given a token, T, that we want to complain about: if T's (starting)

   643     // lineno doesn't match TokenStream's lineno, that means we've scanned past

   644     // the line that T starts on, which makes it hard to print some or all of

   645     // T's (starting) line for context.

   646     //

   647     // So we don't even try, leaving report.linebuf and friends zeroed.  This

   648     // means that any error involving a multi-line token (e.g. an unterminated

   649     // multi-line string literal) won't have a context printed.

   650     if (offset != NoOffset && err.report.lineno == lineno) {

   651         const jschar *tokenStart = userbuf.base() + offset;

   653         // We show only a portion (a "window") of the line around the erroneous

   654         // token -- the first char in the token, plus |windowRadius| chars

   655         // before it and |windowRadius - 1| chars after it.  This is because

   656         // lines can be very long and printing the whole line is (a) not that

   657         // helpful, and (b) can waste a lot of memory.  See bug 634444.

   658         static const size_t windowRadius = 60;

   660         // Truncate at the front if necessary.

   661         const jschar *windowBase = (linebase + windowRadius < tokenStart)

   662                                  ? tokenStart - windowRadius

   663                                  : linebase;

   664         uint32_t windowOffset = tokenStart - windowBase;

   666         // Find EOL, or truncate at the back if necessary.

   667         const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius);

   668         size_t windowLength = windowLimit - windowBase;

   669         JS_ASSERT(windowLength <= windowRadius * 2);

   671         // Create the windowed strings.

   672         StringBuffer windowBuf(cx);

   673         if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0))

   674             return false;

   676         // Unicode and char versions of the window into the offending source

   677         // line, without final \n.

   678         err.report.uclinebuf = windowBuf.extractWellSized();

   679         if (!err.report.uclinebuf)

   680             return false;

   681         TwoByteChars tbchars(err.report.uclinebuf, windowLength);

   682         err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str();

   683         if (!err.report.linebuf)

   684             return false;

   686         err.report.tokenptr = err.report.linebuf + windowOffset;

   687         err.report.uctokenptr = err.report.uclinebuf + windowOffset;

   688     }

   690     if (cx->isJSContext())

   691         err.throwError(cx->asJSContext());

   693     return warning;

   694 }

   696 bool

   697 TokenStream::reportStrictModeError(unsigned errorNumber, ...)

   698 {

   699     va_list args;

   700     va_start(args, errorNumber);

   701     bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),

   702                                                 errorNumber, args);

   703     va_end(args);

   704     return result;

   705 }

   707 bool

   708 TokenStream::reportError(unsigned errorNumber, ...)

   709 {

   710     va_list args;

   711     va_start(args, errorNumber);

   712     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,

   713                                              args);

   714     va_end(args);

   715     return result;

   716 }

   718 bool

   719 TokenStream::reportWarning(unsigned errorNumber, ...)

   720 {

   721     va_list args;

   722     va_start(args, errorNumber);

   723     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,

   724                                              errorNumber, args);

   725     va_end(args);

   726     return result;

   727 }

   729 bool

   730 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)

   731 {

   732     if (!options().extraWarningsOption)

   733         return true;

   735     return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);

   736 }

   738 void

   739 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)

   740 {

   741     va_list args;

   742     va_start(args, errorNumber);

   743     reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args);

   744     va_end(args);

   745 }

   747 // We have encountered a '\': check for a Unicode escape sequence after it.

   748 // Return 'true' and the character code value (by value) if we found a

   749 // Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not

   750 // advance along the buffer.

   751 bool

   752 TokenStream::peekUnicodeEscape(int *result)

   753 {

   754     jschar cp[5];

   756     if (peekChars(5, cp) && cp[0] == 'u' &&

   757         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&

   758         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))

   759     {

   760         *result = (((((JS7_UNHEX(cp[1]) << 4)

   761                 + JS7_UNHEX(cp[2])) << 4)

   762               + JS7_UNHEX(cp[3])) << 4)

   763             + JS7_UNHEX(cp[4]);

   764         return true;

   765     }

   766     return false;

   767 }

   769 bool

   770 TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)

   771 {

   772     if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {

   773         skipChars(5);

   774         return true;

   775     }

   776     return false;

   777 }

   779 bool

   780 TokenStream::matchUnicodeEscapeIdent(int32_t *cp)

   781 {

   782     if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {

   783         skipChars(5);

   784         return true;

   785     }

   786     return false;

   787 }

   789 // Helper function which returns true if the first length(q) characters in p are

   790 // the same as the characters in q.

   791 static bool

   792 CharsMatch(const jschar *p, const char *q) {

   793     while (*q) {

   794         if (*p++ != *q++)

   795             return false;

   796     }

   797     return true;

   798 }

   800 bool

   801 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)

   802 {

   803     // Match directive comments used in debugging, such as "//# sourceURL" and

   804     // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.

   805     //

   806     // To avoid a crashing bug in IE, several JavaScript transpilers wrap single

   807     // line comments containing a source mapping URL inside a multiline

   808     // comment. To avoid potentially expensive lookahead and backtracking, we

   809     // only check for this case if we encounter a '#' character.

   811     if (!getDisplayURL(isMultiline, shouldWarnDeprecated))

   812         return false;

   813     if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))

   814         return false;

   816     return true;

   817 }

   819 bool

   820 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,

   821                           const char *directive, int directiveLength,

   822                           const char *errorMsgPragma, jschar **destination) {

   823     JS_ASSERT(directiveLength <= 18);

   824     jschar peeked[18];

   825     int32_t c;

   827     if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {

   828         if (shouldWarnDeprecated &&

   829             !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))

   830             return false;

   832         skipChars(directiveLength);

   833         tokenbuf.clear();

   835         while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {

   836             getChar();

   837             // Debugging directives can occur in both single- and multi-line

   838             // comments. If we're currently inside a multi-line comment, we also

   839             // need to recognize multi-line comment terminators.

   840             if (isMultiline && c == '*' && peekChar() == '/') {

   841                 ungetChar('*');

   842                 break;

   843             }

   844             tokenbuf.append(c);

   845         }

   847         if (tokenbuf.empty())

   848             // The directive's URL was missing, but this is not quite an

   849             // exception that we should stop and drop everything for.

   850             return true;

   852         size_t length = tokenbuf.length();

   854         js_free(*destination);

   855         *destination = cx->pod_malloc<jschar>(length + 1);

   856         if (!*destination)

   857             return false;

   859         PodCopy(*destination, tokenbuf.begin(), length);

   860         (*destination)[length] = '\0';

   861     }

   863     return true;

   864 }

   866 bool

   867 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)

   868 {

   869     // Match comments of the form "//# sourceURL=<url>" or

   870     // "/\* //# sourceURL=<url> *\/"

   871     //

   872     // Note that while these are labeled "sourceURL" in the source text,

   873     // internally we refer to it as a "displayURL" to distinguish what the

   874     // developer would like to refer to the source as from the source's actual

   875     // URL.

   877     return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,

   878                         "sourceURL", &displayURL_);

   879 }

   881 bool

   882 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)

   883 {

   884     // Match comments of the form "//# sourceMappingURL=<url>" or

   885     // "/\* //# sourceMappingURL=<url> *\/"

   887     return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,

   888                         "sourceMappingURL", &sourceMapURL_);

   889 }

   891 MOZ_ALWAYS_INLINE Token *

   892 TokenStream::newToken(ptrdiff_t adjust)

   893 {

   894     cursor = (cursor + 1) & ntokensMask;

   895     Token *tp = &tokens[cursor];

   896     tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base();

   898     // NOTE: tp->pos.end is not set until the very end of getTokenInternal().

   899     MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));

   901     return tp;

   902 }

   904 MOZ_ALWAYS_INLINE JSAtom *

   905 TokenStream::atomize(ExclusiveContext *cx, CharBuffer &cb)

   906 {

   907     return AtomizeChars(cx, cb.begin(), cb.length());

   908 }

   910 #ifdef DEBUG

   911 static bool

   912 IsTokenSane(Token *tp)

   913 {

   914     // Nb: TOK_EOL should never be used in an actual Token;  it should only be

   915     // returned as a TokenKind from peekTokenSameLine().

   916     if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)

   917         return false;

   919     if (tp->pos.end < tp->pos.begin)

   920         return false;

   922     return true;

   923 }

   924 #endif

   926 bool

   927 TokenStream::putIdentInTokenbuf(const jschar *identStart)

   928 {

   929     int32_t c, qc;

   930     const jschar *tmp = userbuf.addressOfNextRawChar();

   931     userbuf.setAddressOfNextRawChar(identStart);

   933     tokenbuf.clear();

   934     for (;;) {

   935         c = getCharIgnoreEOL();

   936         if (!IsIdentifierPart(c)) {

   937             if (c != '\\' || !matchUnicodeEscapeIdent(&qc))

   938                 break;

   939             c = qc;

   940         }

   941         if (!tokenbuf.append(c)) {

   942             userbuf.setAddressOfNextRawChar(tmp);

   943             return false;

   944         }

   945     }

   946     userbuf.setAddressOfNextRawChar(tmp);

   947     return true;

   948 }

   950 bool

   951 TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp)

   952 {

   953     const KeywordInfo *kw = FindKeyword(s, length);

   954     if (!kw)

   955         return true;

   957     if (kw->tokentype == TOK_RESERVED)

   958         return reportError(JSMSG_RESERVED_ID, kw->chars);

   960     if (kw->tokentype != TOK_STRICT_RESERVED) {

   961         if (kw->version <= versionNumber()) {

   962             // Working keyword.

   963             if (ttp) {

   964                 *ttp = kw->tokentype;

   965                 return true;

   966             }

   967             return reportError(JSMSG_RESERVED_ID, kw->chars);

   968         }

   970         // The keyword is not in this version. Treat it as an identifier, unless

   971         // it is let which we treat as TOK_STRICT_RESERVED by falling through to

   972         // the code below (ES5 forbids it in strict mode).

   973         if (kw->tokentype != TOK_LET)

   974             return true;

   975     }

   977     // Strict reserved word.

   978     return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);

   979 }

   981 enum FirstCharKind {

   982     // A jschar has the 'OneChar' kind if it, by itself, constitutes a valid

   983     // token that cannot also be a prefix of a longer token.  E.g. ';' has the

   984     // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens

   985     // that begin with '+'.

   986     //

   987     // The few token kinds satisfying these properties cover roughly 35--45%

   988     // of the tokens seen in practice.

   989     //

   990     // We represent the 'OneChar' kind with any positive value less than

   991     // TOK_LIMIT.  This representation lets us associate each one-char token

   992     // jschar with a TokenKind and thus avoid a subsequent jschar-to-TokenKind

   993     // conversion.

   994     OneChar_Min = 0,

   995     OneChar_Max = TOK_LIMIT - 1,

   997     Space = TOK_LIMIT,

   998     Ident,

   999     Dec,

  1000     String,

  1001     EOL,

  1002     BasePrefix,

  1003     Other,

  1005     LastCharKind = Other

  1006 };

  1008 // OneChar: 40,  41,  44,  58,  59,  63,  91,  93,  123, 125, 126:

  1009 //          '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'

  1010 // Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'

  1011 // Dot:     46: '.'

  1012 // Equals:  61: '='

  1013 // String:  34, 39: '"', '\''

  1014 // Dec:     49..57: '1'..'9'

  1015 // Plus:    43: '+'

  1016 // BasePrefix:  48: '0'

  1017 // Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '

  1018 // EOL:     10, 13: '\n', '\r'

  1019 //

  1020 #define T_COMMA     TOK_COMMA

  1021 #define T_COLON     TOK_COLON

  1022 #define T_BITNOT    TOK_BITNOT

  1023 #define _______ Other

  1024 static const uint8_t firstCharKinds[] = {

  1025 /*         0        1        2        3        4        5        6        7        8        9    */

  1026 /*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,

  1027 /*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,

  1028 /*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,

  1029 /*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,

  1030 /*  40+ */  TOK_LP,  TOK_RP, _______, _______, T_COMMA,_______,  _______, _______,BasePrefix,  Dec,

  1031 /*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,    Dec,  T_COLON,TOK_SEMI,

  1032 /*  60+ */ _______, _______, _______,TOK_HOOK, _______,   Ident,   Ident,   Ident,   Ident,   Ident,

  1033 /*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

  1034 /*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

  1035 /*  90+ */   Ident,  TOK_LB, _______,  TOK_RB, _______,   Ident, _______,   Ident,   Ident,   Ident,

  1036 /* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

  1037 /* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,

  1038 /* 120+ */   Ident,   Ident,   Ident,  TOK_LC, _______,  TOK_RC,T_BITNOT, _______

  1039 };

  1040 #undef T_COMMA

  1041 #undef T_COLON

  1042 #undef T_BITNOT

  1043 #undef _______

  1045 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),

  1046               "Elements of firstCharKinds[] are too small");

  1048 TokenKind

  1049 TokenStream::getTokenInternal(Modifier modifier)

  1050 {

  1051     int c, qc;

  1052     Token *tp;

  1053     FirstCharKind c1kind;

  1054     const jschar *numStart;

  1055     bool hasExp;

  1056     DecimalPoint decimalPoint;

  1057     const jschar *identStart;

  1058     bool hadUnicodeEscape;

  1060   retry:

  1061     if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {

  1062         tp = newToken(0);

  1063         tp->type = TOK_EOF;

  1064         flags.isEOF = true;

  1065         goto out;

  1066     }

  1068     c = userbuf.getRawChar();

  1069     JS_ASSERT(c != EOF);

  1071     // Chars not in the range 0..127 are rare.  Getting them out of the way

  1072     // early allows subsequent checking to be faster.

  1073     if (MOZ_UNLIKELY(c >= 128)) {

  1074         if (IsSpaceOrBOM2(c)) {

  1075             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {

  1076                 updateLineInfoForEOL();

  1077                 updateFlagsForEOL();

  1078             }

  1080             goto retry;

  1081         }

  1083         tp = newToken(-1);

  1085         // '$' and '_' don't pass IsLetter, but they're < 128 so never appear here.

  1086         JS_STATIC_ASSERT('$' < 128 && '_' < 128);

  1087         if (IsLetter(c)) {

  1088             identStart = userbuf.addressOfNextRawChar() - 1;

  1089             hadUnicodeEscape = false;

  1090             goto identifier;

  1091         }

  1093         goto badchar;

  1094     }

  1096     // Get the token kind, based on the first char.  The ordering of c1kind

  1097     // comparison is based on the frequency of tokens in real code -- Parsemark

  1098     // (which represents typical JS code on the web) and the Unreal demo (which

  1099     // represents asm.js code).

  1100     //

  1101     //                  Parsemark   Unreal

  1102     //  OneChar         32.9%       39.7%

  1103     //  Space           25.0%        0.6%

  1104     //  Ident           19.2%       36.4%

  1105     //  Dec              7.2%        5.1%

  1106     //  String           7.9%        0.0%

  1107     //  EOL              1.7%        0.0%

  1108     //  BasePrefix       0.4%        4.9%

  1109     //  Other            5.7%       13.3%

  1110     //

  1111     // The ordering is based mostly only Parsemark frequencies, with Unreal

  1112     // frequencies used to break close categories (e.g. |Dec| and |String|).

  1113     // |Other| is biggish, but no other token kind is common enough for it to

  1114     // be worth adding extra values to FirstCharKind.

  1115     //

  1116     c1kind = FirstCharKind(firstCharKinds[c]);

  1118     // Look for an unambiguous single-char token.

  1119     //

  1120     if (c1kind <= OneChar_Max) {

  1121         tp = newToken(-1);

  1122         tp->type = TokenKind(c1kind);

  1123         goto out;

  1124     }

  1126     // Skip over non-EOL whitespace chars.

  1127     //

  1128     if (c1kind == Space)

  1129         goto retry;

  1131     // Look for an identifier.

  1132     //

  1133     if (c1kind == Ident) {

  1134         tp = newToken(-1);

  1135         identStart = userbuf.addressOfNextRawChar() - 1;

  1136         hadUnicodeEscape = false;

  1138       identifier:

  1139         for (;;) {

  1140             c = getCharIgnoreEOL();

  1141             if (c == EOF)

  1142                 break;

  1143             if (!IsIdentifierPart(c)) {

  1144                 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))

  1145                     break;

  1146                 hadUnicodeEscape = true;

  1147             }

  1148         }

  1149         ungetCharIgnoreEOL(c);

  1151         // Identifiers containing no Unicode escapes can be processed directly

  1152         // from userbuf.  The rest must use the escapes converted via tokenbuf

  1153         // before atomizing.

  1154         const jschar *chars;

  1155         size_t length;

  1156         if (hadUnicodeEscape) {

  1157             if (!putIdentInTokenbuf(identStart))

  1158                 goto error;

  1160             chars = tokenbuf.begin();

  1161             length = tokenbuf.length();

  1162         } else {

  1163             chars = identStart;

  1164             length = userbuf.addressOfNextRawChar() - identStart;

  1165         }

  1167         // Check for keywords unless the parser told us not to.

  1168         if (modifier != KeywordIsName) {

  1169             tp->type = TOK_NAME;

  1170             if (!checkForKeyword(chars, length, &tp->type))

  1171                 goto error;

  1172             if (tp->type != TOK_NAME)

  1173                 goto out;

  1174         }

  1176         JSAtom *atom = AtomizeChars(cx, chars, length);

  1177         if (!atom)

  1178             goto error;

  1179         tp->type = TOK_NAME;

  1180         tp->setName(atom->asPropertyName());

  1181         goto out;

  1182     }

  1184     // Look for a decimal number.

  1185     //

  1186     if (c1kind == Dec) {

  1187         tp = newToken(-1);

  1188         numStart = userbuf.addressOfNextRawChar() - 1;

  1190       decimal:

  1191         decimalPoint = NoDecimal;

  1192         hasExp = false;

  1193         while (JS7_ISDEC(c))

  1194             c = getCharIgnoreEOL();

  1196         if (c == '.') {

  1197             decimalPoint = HasDecimal;

  1198           decimal_dot:

  1199             do {

  1200                 c = getCharIgnoreEOL();

  1201             } while (JS7_ISDEC(c));

  1202         }

  1203         if (c == 'e' || c == 'E') {

  1204             hasExp = true;

  1205             c = getCharIgnoreEOL();

  1206             if (c == '+' || c == '-')

  1207                 c = getCharIgnoreEOL();

  1208             if (!JS7_ISDEC(c)) {

  1209                 ungetCharIgnoreEOL(c);

  1210                 reportError(JSMSG_MISSING_EXPONENT);

  1211                 goto error;

  1212             }

  1213             do {

  1214                 c = getCharIgnoreEOL();

  1215             } while (JS7_ISDEC(c));

  1216         }

  1217         ungetCharIgnoreEOL(c);

  1219         if (c != EOF && IsIdentifierStart(c)) {

  1220             reportError(JSMSG_IDSTART_AFTER_NUMBER);

  1221             goto error;

  1222         }

  1224         // Unlike identifiers and strings, numbers cannot contain escaped

  1225         // chars, so we don't need to use tokenbuf.  Instead we can just

  1226         // convert the jschars in userbuf directly to the numeric value.

  1227         double dval;

  1228         if (!((decimalPoint == HasDecimal) || hasExp)) {

  1229             if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))

  1230                 goto error;

  1231         } else {

  1232             const jschar *dummy;

  1233             if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))

  1234                 goto error;

  1235         }

  1236         tp->type = TOK_NUMBER;

  1237         tp->setNumber(dval, decimalPoint);

  1238         goto out;

  1239     }

  1241     // Look for a string.

  1242     //

  1243     if (c1kind == String) {

  1244         tp = newToken(-1);

  1245         qc = c;

  1246         tokenbuf.clear();

  1247         while (true) {

  1248             // We need to detect any of these chars:  " or ', \n (or its

  1249             // equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner

  1250             // similar to maybeEOL[], see above.  Because we detect EOL

  1251             // sequences here and put them back immediately, we can use

  1252             // getCharIgnoreEOL().

  1253             c = getCharIgnoreEOL();

  1254             if (maybeStrSpecial[c & 0xff]) {

  1255                 if (c == qc)

  1256                     break;

  1257                 if (c == '\\') {

  1258                     switch (c = getChar()) {

  1259                       case 'b': c = '\b'; break;

  1260                       case 'f': c = '\f'; break;

  1261                       case 'n': c = '\n'; break;

  1262                       case 'r': c = '\r'; break;

  1263                       case 't': c = '\t'; break;

  1264                       case 'v': c = '\v'; break;

  1266                       default:

  1267                         if ('0' <= c && c < '8') {

  1268                             int32_t val = JS7_UNDEC(c);

  1270                             c = peekChar();

  1271                             // Strict mode code allows only \0, then a non-digit.

  1272                             if (val != 0 || JS7_ISDEC(c)) {

  1273                                 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))

  1274                                     goto error;

  1275                                 flags.sawOctalEscape = true;

  1276                             }

  1277                             if ('0' <= c && c < '8') {

  1278                                 val = 8 * val + JS7_UNDEC(c);

  1279                                 getChar();

  1280                                 c = peekChar();

  1281                                 if ('0' <= c && c < '8') {

  1282                                     int32_t save = val;

  1283                                     val = 8 * val + JS7_UNDEC(c);

  1284                                     if (val <= 0377)

  1285                                         getChar();

  1286                                     else

  1287                                         val = save;

  1288                                 }

  1289                             }

  1291                             c = jschar(val);

  1292                         } else if (c == 'u') {

  1293                             jschar cp[4];

  1294                             if (peekChars(4, cp) &&

  1295                                 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&

  1296                                 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {

  1297                                 c = (((((JS7_UNHEX(cp[0]) << 4)

  1298                                         + JS7_UNHEX(cp[1])) << 4)

  1299                                       + JS7_UNHEX(cp[2])) << 4)

  1300                                     + JS7_UNHEX(cp[3]);

  1301                                 skipChars(4);

  1302                             } else {

  1303                                 reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");

  1304                                 goto error;

  1305                             }

  1306                         } else if (c == 'x') {

  1307                             jschar cp[2];

  1308                             if (peekChars(2, cp) &&

  1309                                 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {

  1310                                 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);

  1311                                 skipChars(2);

  1312                             } else {

  1313                                 reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");

  1314                                 goto error;

  1315                             }

  1316                         } else if (c == '\n') {

  1317                             // ES5 7.8.4: an escaped line terminator represents

  1318                             // no character.

  1319                             continue;

  1320                         }

  1321                         break;

  1322                     }

  1323                 } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {

  1324                     ungetCharIgnoreEOL(c);

  1325                     reportError(JSMSG_UNTERMINATED_STRING);

  1326                     goto error;

  1327                 }

  1328             }

  1329             if (!tokenbuf.append(c))

  1330                 goto error;

  1331         }

  1332         JSAtom *atom = atomize(cx, tokenbuf);

  1333         if (!atom)

  1334             goto error;

  1335         tp->type = TOK_STRING;

  1336         tp->setAtom(atom);

  1337         goto out;

  1338     }

  1340     // Skip over EOL chars, updating line state along the way.

  1341     //

  1342     if (c1kind == EOL) {

  1343         // If it's a \r\n sequence: treat as a single EOL, skip over the \n.

  1344         if (c == '\r' && userbuf.hasRawChars())

  1345             userbuf.matchRawChar('\n');

  1346         updateLineInfoForEOL();

  1347         updateFlagsForEOL();

  1348         goto retry;

  1349     }

  1351     // Look for a hexadecimal, octal, or binary number.

  1352     //

  1353     if (c1kind == BasePrefix) {

  1354         tp = newToken(-1);

  1355         int radix;

  1356         c = getCharIgnoreEOL();

  1357         if (c == 'x' || c == 'X') {

  1358             radix = 16;

  1359             c = getCharIgnoreEOL();

  1360             if (!JS7_ISHEX(c)) {

  1361                 ungetCharIgnoreEOL(c);

  1362                 reportError(JSMSG_MISSING_HEXDIGITS);

  1363                 goto error;

  1364             }

  1365             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0x'

  1366             while (JS7_ISHEX(c))

  1367                 c = getCharIgnoreEOL();

  1368         } else if (c == 'b' || c == 'B') {

  1369             radix = 2;

  1370             c = getCharIgnoreEOL();

  1371             if (c != '0' && c != '1') {

  1372                 ungetCharIgnoreEOL(c);

  1373                 reportError(JSMSG_MISSING_BINARY_DIGITS);

  1374                 goto error;

  1375             }

  1376             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0b'

  1377             while (c == '0' || c == '1')

  1378                 c = getCharIgnoreEOL();

  1379         } else if (c == 'o' || c == 'O') {

  1380             radix = 8;

  1381             c = getCharIgnoreEOL();

  1382             if (c < '0' || c > '7') {

  1383                 ungetCharIgnoreEOL(c);

  1384                 reportError(JSMSG_MISSING_OCTAL_DIGITS);

  1385                 goto error;

  1386             }

  1387             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0o'

  1388             while ('0' <= c && c <= '7')

  1389                 c = getCharIgnoreEOL();

  1390         } else if (JS7_ISDEC(c)) {

  1391             radix = 8;

  1392             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0'

  1393             while (JS7_ISDEC(c)) {

  1394                 // Octal integer literals are not permitted in strict mode code.

  1395                 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))

  1396                     goto error;

  1398                 // Outside strict mode, we permit 08 and 09 as decimal numbers,

  1399                 // which makes our behaviour a superset of the ECMA numeric

  1400                 // grammar. We might not always be so permissive, so we warn

  1401                 // about it.

  1402                 if (c >= '8') {

  1403                     if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {

  1404                         goto error;

  1405                     }

  1406                     goto decimal;   // use the decimal scanner for the rest of the number

  1407                 }

  1408                 c = getCharIgnoreEOL();

  1409             }

  1410         } else {

  1411             // '0' not followed by 'x', 'X' or a digit;  scan as a decimal number.

  1412             numStart = userbuf.addressOfNextRawChar() - 1;

  1413             goto decimal;

  1414         }

  1415         ungetCharIgnoreEOL(c);

  1417         if (c != EOF && IsIdentifierStart(c)) {

  1418             reportError(JSMSG_IDSTART_AFTER_NUMBER);

  1419             goto error;

  1420         }

  1422         double dval;

  1423         const jschar *dummy;

  1424         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))

  1425             goto error;

  1426         tp->type = TOK_NUMBER;

  1427         tp->setNumber(dval, NoDecimal);

  1428         goto out;

  1429     }

  1431     // This handles everything else.

  1432     //

  1433     JS_ASSERT(c1kind == Other);

  1434     tp = newToken(-1);

  1435     switch (c) {

  1436       case '.':

  1437         c = getCharIgnoreEOL();

  1438         if (JS7_ISDEC(c)) {

  1439             numStart = userbuf.addressOfNextRawChar() - 2;

  1440             decimalPoint = HasDecimal;

  1441             hasExp = false;

  1442             goto decimal_dot;

  1443         }

  1444         if (c == '.') {

  1445             if (matchChar('.')) {

  1446                 tp->type = TOK_TRIPLEDOT;

  1447                 goto out;

  1448             }

  1449         }

  1450         ungetCharIgnoreEOL(c);

  1451         tp->type = TOK_DOT;

  1452         goto out;

  1454       case '=':

  1455         if (matchChar('='))

  1456             tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;

  1457         else if (matchChar('>'))

  1458             tp->type = TOK_ARROW;

  1459         else

  1460             tp->type = TOK_ASSIGN;

  1461         goto out;

  1463       case '+':

  1464         if (matchChar('+'))

  1465             tp->type = TOK_INC;

  1466         else

  1467             tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;

  1468         goto out;

  1470       case '\\':

  1471         hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);

  1472         if (hadUnicodeEscape) {

  1473             identStart = userbuf.addressOfNextRawChar() - 6;

  1474             goto identifier;

  1475         }

  1476         goto badchar;

  1478       case '|':

  1479         if (matchChar('|'))

  1480             tp->type = TOK_OR;

  1481         else

  1482             tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;

  1483         goto out;

  1485       case '^':

  1486         tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;

  1487         goto out;

  1489       case '&':

  1490         if (matchChar('&'))

  1491             tp->type = TOK_AND;

  1492         else

  1493             tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;

  1494         goto out;

  1496       case '!':

  1497         if (matchChar('='))

  1498             tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;

  1499         else

  1500             tp->type = TOK_NOT;

  1501         goto out;

  1503       case '<':

  1504         // NB: treat HTML begin-comment as comment-till-end-of-line.

  1505         if (matchChar('!')) {

  1506             if (matchChar('-')) {

  1507                 if (matchChar('-'))

  1508                     goto skipline;

  1509                 ungetChar('-');

  1510             }

  1511             ungetChar('!');

  1512         }

  1513         if (matchChar('<')) {

  1514             tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;

  1515         } else {

  1516             tp->type = matchChar('=') ? TOK_LE : TOK_LT;

  1517         }

  1518         goto out;

  1520       case '>':

  1521         if (matchChar('>')) {

  1522             if (matchChar('>'))

  1523                 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;

  1524             else

  1525                 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;

  1526         } else {

  1527             tp->type = matchChar('=') ? TOK_GE : TOK_GT;

  1528         }

  1529         goto out;

  1531       case '*':

  1532         tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;

  1533         goto out;

  1535       case '/':

  1536         // Look for a single-line comment.

  1537         if (matchChar('/')) {

  1538             c = peekChar();

  1539             if (c == '@' || c == '#') {

  1540                 bool shouldWarn = getChar() == '@';

  1541                 if (!getDirectives(false, shouldWarn))

  1542                     goto error;

  1543             }

  1545         skipline:

  1546             while ((c = getChar()) != EOF && c != '\n')

  1547                 continue;

  1548             ungetChar(c);

  1549             cursor = (cursor - 1) & ntokensMask;

  1550             goto retry;

  1551         }

  1553         // Look for a multi-line comment.

  1554         if (matchChar('*')) {

  1555             unsigned linenoBefore = lineno;

  1556             while ((c = getChar()) != EOF &&

  1557                    !(c == '*' && matchChar('/'))) {

  1558                 if (c == '@' || c == '#') {

  1559                     bool shouldWarn = c == '@';

  1560                     if (!getDirectives(true, shouldWarn))

  1561                         goto error;

  1562                 }

  1563             }

  1564             if (c == EOF) {

  1565                 reportError(JSMSG_UNTERMINATED_COMMENT);

  1566                 goto error;

  1567             }

  1568             if (linenoBefore != lineno)

  1569                 updateFlagsForEOL();

  1570             cursor = (cursor - 1) & ntokensMask;

  1571             goto retry;

  1572         }

  1574         // Look for a regexp.

  1575         if (modifier == Operand) {

  1576             tokenbuf.clear();

  1578             bool inCharClass = false;

  1579             for (;;) {

  1580                 c = getChar();

  1581                 if (c == '\\') {

  1582                     if (!tokenbuf.append(c))

  1583                         goto error;

  1584                     c = getChar();

  1585                 } else if (c == '[') {

  1586                     inCharClass = true;

  1587                 } else if (c == ']') {

  1588                     inCharClass = false;

  1589                 } else if (c == '/' && !inCharClass) {

  1590                     // For compat with IE, allow unescaped / in char classes.

  1591                     break;

  1592                 }

  1593                 if (c == '\n' || c == EOF) {

  1594                     ungetChar(c);

  1595                     reportError(JSMSG_UNTERMINATED_REGEXP);

  1596                     goto error;

  1597                 }

  1598                 if (!tokenbuf.append(c))

  1599                     goto error;

  1600             }

  1602             RegExpFlag reflags = NoFlags;

  1603             unsigned length = tokenbuf.length() + 1;

  1604             while (true) {

  1605                 c = peekChar();

  1606                 if (c == 'g' && !(reflags & GlobalFlag))

  1607                     reflags = RegExpFlag(reflags | GlobalFlag);

  1608                 else if (c == 'i' && !(reflags & IgnoreCaseFlag))

  1609                     reflags = RegExpFlag(reflags | IgnoreCaseFlag);

  1610                 else if (c == 'm' && !(reflags & MultilineFlag))

  1611                     reflags = RegExpFlag(reflags | MultilineFlag);

  1612                 else if (c == 'y' && !(reflags & StickyFlag))

  1613                     reflags = RegExpFlag(reflags | StickyFlag);

  1614                 else

  1615                     break;

  1616                 getChar();

  1617                 length++;

  1618             }

  1620             c = peekChar();

  1621             if (JS7_ISLET(c)) {

  1622                 char buf[2] = { '\0', '\0' };

  1623                 tp->pos.begin += length + 1;

  1624                 buf[0] = char(c);

  1625                 reportError(JSMSG_BAD_REGEXP_FLAG, buf);

  1626                 (void) getChar();

  1627                 goto error;

  1628             }

  1629             tp->type = TOK_REGEXP;

  1630             tp->setRegExpFlags(reflags);

  1631             goto out;

  1632         }

  1634         tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;

  1635         goto out;

  1637       case '%':

  1638         tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;

  1639         goto out;

  1641       case '-':

  1642         if (matchChar('-')) {

  1643             if (peekChar() == '>' && !flags.isDirtyLine)

  1644                 goto skipline;

  1645             tp->type = TOK_DEC;

  1646         } else {

  1647             tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;

  1648         }

  1649         goto out;

  1651       badchar:

  1652       default:

  1653         reportError(JSMSG_ILLEGAL_CHARACTER);

  1654         goto error;

  1655     }

  1657     MOZ_ASSUME_UNREACHABLE("should have jumped to |out| or |error|");

  1659   out:

  1660     flags.isDirtyLine = true;

  1661     tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();

  1662     JS_ASSERT(IsTokenSane(tp));

  1663     return tp->type;

  1665   error:

  1666     flags.isDirtyLine = true;

  1667     tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();

  1668     tp->type = TOK_ERROR;

  1669     JS_ASSERT(IsTokenSane(tp));

  1670     onError();

  1671     return TOK_ERROR;

  1672 }

  1674 void

  1675 TokenStream::onError()

  1676 {

  1677     flags.hadError = true;

  1678 #ifdef DEBUG

  1679     // Poisoning userbuf on error establishes an invariant: once an erroneous

  1680     // token has been seen, userbuf will not be consulted again.  This is true

  1681     // because the parser will either (a) deal with the TOK_ERROR token by

  1682     // aborting parsing immediately; or (b) if the TOK_ERROR token doesn't

  1683     // match what it expected, it will unget the token, and the next getToken()

  1684     // call will immediately return the just-gotten TOK_ERROR token again

  1685     // without consulting userbuf, thanks to the lookahead buffer.

  1686     userbuf.poison();

  1687 #endif

  1688 }

  1690 JS_FRIEND_API(int)

  1691 js_fgets(char *buf, int size, FILE *file)

  1692 {

  1693     int n, i, c;

  1694     bool crflag;

  1696     n = size - 1;

  1697     if (n < 0)

  1698         return -1;

  1700     crflag = false;

  1701     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {

  1702         buf[i] = c;

  1703         if (c == '\n') {        // any \n ends a line

  1704             i++;                // keep the \n; we know there is room for \0

  1705             break;

  1706         }

  1707         if (crflag) {           // \r not followed by \n ends line at the \r

  1708             ungetc(c, file);

  1709             break;              // and overwrite c in buf with \0

  1710         }

  1711         crflag = (c == '\r');

  1712     }

  1714     buf[i] = '\0';

  1715     return i;

  1716 }

  1718 #ifdef DEBUG

  1719 const char *

  1720 TokenKindToString(TokenKind tt)

  1721 {

  1722     switch (tt) {

  1723       case TOK_ERROR:           return "TOK_ERROR";

  1724       case TOK_EOF:             return "TOK_EOF";

  1725       case TOK_EOL:             return "TOK_EOL";

  1726       case TOK_SEMI:            return "TOK_SEMI";

  1727       case TOK_COMMA:           return "TOK_COMMA";

  1728       case TOK_HOOK:            return "TOK_HOOK";

  1729       case TOK_COLON:           return "TOK_COLON";

  1730       case TOK_OR:              return "TOK_OR";

  1731       case TOK_AND:             return "TOK_AND";

  1732       case TOK_BITOR:           return "TOK_BITOR";

  1733       case TOK_BITXOR:          return "TOK_BITXOR";

  1734       case TOK_BITAND:          return "TOK_BITAND";

  1735       case TOK_ADD:             return "TOK_ADD";

  1736       case TOK_SUB:             return "TOK_SUB";

  1737       case TOK_MUL:             return "TOK_MUL";

  1738       case TOK_DIV:             return "TOK_DIV";

  1739       case TOK_MOD:             return "TOK_MOD";

  1740       case TOK_INC:             return "TOK_INC";

  1741       case TOK_DEC:             return "TOK_DEC";

  1742       case TOK_DOT:             return "TOK_DOT";

  1743       case TOK_TRIPLEDOT:       return "TOK_TRIPLEDOT";

  1744       case TOK_LB:              return "TOK_LB";

  1745       case TOK_RB:              return "TOK_RB";

  1746       case TOK_LC:              return "TOK_LC";

  1747       case TOK_RC:              return "TOK_RC";

  1748       case TOK_LP:              return "TOK_LP";

  1749       case TOK_RP:              return "TOK_RP";

  1750       case TOK_ARROW:           return "TOK_ARROW";

  1751       case TOK_NAME:            return "TOK_NAME";

  1752       case TOK_NUMBER:          return "TOK_NUMBER";

  1753       case TOK_STRING:          return "TOK_STRING";

  1754       case TOK_REGEXP:          return "TOK_REGEXP";

  1755       case TOK_TRUE:            return "TOK_TRUE";

  1756       case TOK_FALSE:           return "TOK_FALSE";

  1757       case TOK_NULL:            return "TOK_NULL";

  1758       case TOK_THIS:            return "TOK_THIS";

  1759       case TOK_FUNCTION:        return "TOK_FUNCTION";

  1760       case TOK_IF:              return "TOK_IF";

  1761       case TOK_ELSE:            return "TOK_ELSE";

  1762       case TOK_SWITCH:          return "TOK_SWITCH";

  1763       case TOK_CASE:            return "TOK_CASE";

  1764       case TOK_DEFAULT:         return "TOK_DEFAULT";

  1765       case TOK_WHILE:           return "TOK_WHILE";

  1766       case TOK_DO:              return "TOK_DO";

  1767       case TOK_FOR:             return "TOK_FOR";

  1768       case TOK_BREAK:           return "TOK_BREAK";

  1769       case TOK_CONTINUE:        return "TOK_CONTINUE";

  1770       case TOK_IN:              return "TOK_IN";

  1771       case TOK_VAR:             return "TOK_VAR";

  1772       case TOK_CONST:           return "TOK_CONST";

  1773       case TOK_WITH:            return "TOK_WITH";

  1774       case TOK_RETURN:          return "TOK_RETURN";

  1775       case TOK_NEW:             return "TOK_NEW";

  1776       case TOK_DELETE:          return "TOK_DELETE";

  1777       case TOK_TRY:             return "TOK_TRY";

  1778       case TOK_CATCH:           return "TOK_CATCH";

  1779       case TOK_FINALLY:         return "TOK_FINALLY";

  1780       case TOK_THROW:           return "TOK_THROW";

  1781       case TOK_INSTANCEOF:      return "TOK_INSTANCEOF";

  1782       case TOK_DEBUGGER:        return "TOK_DEBUGGER";

  1783       case TOK_YIELD:           return "TOK_YIELD";

  1784       case TOK_LET:             return "TOK_LET";

  1785       case TOK_RESERVED:        return "TOK_RESERVED";

  1786       case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";

  1787       case TOK_STRICTEQ:        return "TOK_STRICTEQ";

  1788       case TOK_EQ:              return "TOK_EQ";

  1789       case TOK_STRICTNE:        return "TOK_STRICTNE";

  1790       case TOK_NE:              return "TOK_NE";

  1791       case TOK_TYPEOF:          return "TOK_TYPEOF";

  1792       case TOK_VOID:            return "TOK_VOID";

  1793       case TOK_NOT:             return "TOK_NOT";

  1794       case TOK_BITNOT:          return "TOK_BITNOT";

  1795       case TOK_LT:              return "TOK_LT";

  1796       case TOK_LE:              return "TOK_LE";

  1797       case TOK_GT:              return "TOK_GT";

  1798       case TOK_GE:              return "TOK_GE";

  1799       case TOK_LSH:             return "TOK_LSH";

  1800       case TOK_RSH:             return "TOK_RSH";

  1801       case TOK_URSH:            return "TOK_URSH";

  1802       case TOK_ASSIGN:          return "TOK_ASSIGN";

  1803       case TOK_ADDASSIGN:       return "TOK_ADDASSIGN";

  1804       case TOK_SUBASSIGN:       return "TOK_SUBASSIGN";

  1805       case TOK_BITORASSIGN:     return "TOK_BITORASSIGN";

  1806       case TOK_BITXORASSIGN:    return "TOK_BITXORASSIGN";

  1807       case TOK_BITANDASSIGN:    return "TOK_BITANDASSIGN";

  1808       case TOK_LSHASSIGN:       return "TOK_LSHASSIGN";

  1809       case TOK_RSHASSIGN:       return "TOK_RSHASSIGN";

  1810       case TOK_URSHASSIGN:      return "TOK_URSHASSIGN";

  1811       case TOK_MULASSIGN:       return "TOK_MULASSIGN";

  1812       case TOK_DIVASSIGN:       return "TOK_DIVASSIGN";

  1813       case TOK_MODASSIGN:       return "TOK_MODASSIGN";

  1814       case TOK_EXPORT:          return "TOK_EXPORT";

  1815       case TOK_IMPORT:          return "TOK_IMPORT";

  1816       case TOK_LIMIT:           break;

  1817     }

  1819     return "<bad TokenKind>";

  1820 }

  1821 #endif

The Tor Browser / file revision

js/src/frontend/TokenStream.cpp@6474c204b198

js/src/frontend/TokenStream.cpp