js/src/frontend/TokenStream.cpp

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  * This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 // JS lexical scanner.
     9 #include "frontend/TokenStream.h"
    11 #include "mozilla/PodOperations.h"
    13 #include <ctype.h>
    14 #include <stdarg.h>
    15 #include <stdio.h>
    16 #include <string.h>
    18 #include "jsatom.h"
    19 #include "jscntxt.h"
    20 #include "jsexn.h"
    21 #include "jsnum.h"
    22 #include "jsworkers.h"
    24 #include "frontend/BytecodeCompiler.h"
    25 #include "js/CharacterEncoding.h"
    26 #include "vm/Keywords.h"
    27 #include "vm/StringBuffer.h"
    29 using namespace js;
    30 using namespace js::frontend;
    31 using namespace js::unicode;
    33 using mozilla::Maybe;
    34 using mozilla::PodAssign;
    35 using mozilla::PodCopy;
    36 using mozilla::PodZero;
    38 struct KeywordInfo {
    39     const char  *chars;         // C string with keyword text
    40     TokenKind   tokentype;
    41     JSVersion   version;
    42 };
    44 static const KeywordInfo keywords[] = {
    45 #define KEYWORD_INFO(keyword, name, type, version) \
    46     {js_##keyword##_str, type, version},
    47     FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
    48 #undef KEYWORD_INFO
    49 };
    51 // Returns a KeywordInfo for the specified characters, or nullptr if the string
    52 // is not a keyword.
    53 static const KeywordInfo *
    54 FindKeyword(const jschar *s, size_t length)
    55 {
    56     JS_ASSERT(length != 0);
    58     size_t i;
    59     const KeywordInfo *kw;
    60     const char *chars;
    62 #define JSKW_LENGTH()           length
    63 #define JSKW_AT(column)         s[column]
    64 #define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
    65 #define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
    66 #define JSKW_NO_MATCH()         goto no_match;
    67 #include "jsautokw.h"
    68 #undef JSKW_NO_MATCH
    69 #undef JSKW_TEST_GUESS
    70 #undef JSKW_GOT_MATCH
    71 #undef JSKW_AT
    72 #undef JSKW_LENGTH
    74   got_match:
    75     return &keywords[i];
    77   test_guess:
    78     kw = &keywords[i];
    79     chars = kw->chars;
    80     do {
    81         if (*s++ != (unsigned char)(*chars++))
    82             goto no_match;
    83     } while (--length != 0);
    84     return kw;
    86   no_match:
    87     return nullptr;
    88 }
    90 bool
    91 frontend::IsIdentifier(JSLinearString *str)
    92 {
    93     const jschar *chars = str->chars();
    94     size_t length = str->length();
    96     if (length == 0)
    97         return false;
    98     jschar c = *chars;
    99     if (!IsIdentifierStart(c))
   100         return false;
   101     const jschar *end = chars + length;
   102     while (++chars != end) {
   103         c = *chars;
   104         if (!IsIdentifierPart(c))
   105             return false;
   106     }
   107     return true;
   108 }
   110 bool
   111 frontend::IsKeyword(JSLinearString *str)
   112 {
   113     return FindKeyword(str->chars(), str->length()) != nullptr;
   114 }
   116 TokenStream::SourceCoords::SourceCoords(ExclusiveContext *cx, uint32_t ln)
   117   : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
   118 {
   119     // This is actually necessary!  Removing it causes compile errors on
   120     // GCC and clang.  You could try declaring this:
   121     //
   122     //   const uint32_t TokenStream::SourceCoords::MAX_PTR;
   123     //
   124     // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.
   125     //
   126     uint32_t maxPtr = MAX_PTR;
   128     // The first line begins at buffer offset 0.  MAX_PTR is the sentinel.  The
   129     // appends cannot fail because |lineStartOffsets_| has statically-allocated
   130     // elements.
   131     JS_ASSERT(lineStartOffsets_.capacity() >= 2);
   132     (void)lineStartOffsets_.reserve(2);
   133     lineStartOffsets_.infallibleAppend(0);
   134     lineStartOffsets_.infallibleAppend(maxPtr);
   135 }
   137 MOZ_ALWAYS_INLINE void
   138 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
   139 {
   140     uint32_t lineIndex = lineNumToIndex(lineNum);
   141     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
   143     JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
   145     if (lineIndex == sentinelIndex) {
   146         // We haven't seen this newline before.  Update lineStartOffsets_.
   147         // We ignore any failures due to OOM -- because we always have a
   148         // sentinel node, it'll just be like the newline wasn't present.  I.e.
   149         // the line numbers will be wrong, but the code won't crash or anything
   150         // like that.
   151         lineStartOffsets_[lineIndex] = lineStartOffset;
   153         uint32_t maxPtr = MAX_PTR;
   154         (void)lineStartOffsets_.append(maxPtr);
   156     } else {
   157         // We have seen this newline before (and ungot it).  Do nothing (other
   158         // than checking it hasn't mysteriously changed).
   159         JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset);
   160     }
   161 }
   163 MOZ_ALWAYS_INLINE bool
   164 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other)
   165 {
   166     JS_ASSERT(lineStartOffsets_.back() == MAX_PTR);
   167     JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
   169     if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
   170         return true;
   172     uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
   173     lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
   175     for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
   176         if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
   177             return false;
   178     }
   179     return true;
   180 }
   182 MOZ_ALWAYS_INLINE uint32_t
   183 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
   184 {
   185     uint32_t iMin, iMax, iMid;
   187     if (lineStartOffsets_[lastLineIndex_] <= offset) {
   188         // If we reach here, offset is on a line the same as or higher than
   189         // last time.  Check first for the +0, +1, +2 cases, because they
   190         // typically cover 85--98% of cases.
   191         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
   192             return lastLineIndex_;      // lineIndex is same as last time
   194         // If we reach here, there must be at least one more entry (plus the
   195         // sentinel).  Try it.
   196         lastLineIndex_++;
   197         if (offset < lineStartOffsets_[lastLineIndex_ + 1])
   198             return lastLineIndex_;      // lineIndex is one higher than last time
   200         // The same logic applies here.
   201         lastLineIndex_++;
   202         if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
   203             return lastLineIndex_;      // lineIndex is two higher than last time
   204         }
   206         // No luck.  Oh well, we have a better-than-default starting point for
   207         // the binary search.
   208         iMin = lastLineIndex_ + 1;
   209         JS_ASSERT(iMin < lineStartOffsets_.length() - 1);   // -1 due to the sentinel
   211     } else {
   212         iMin = 0;
   213     }
   215     // This is a binary search with deferred detection of equality, which was
   216     // marginally faster in this case than a standard binary search.
   217     // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
   218     // want one before that.
   219     iMax = lineStartOffsets_.length() - 2;
   220     while (iMax > iMin) {
   221         iMid = iMin + (iMax - iMin) / 2;
   222         if (offset >= lineStartOffsets_[iMid + 1])
   223             iMin = iMid + 1;    // offset is above lineStartOffsets_[iMid]
   224         else
   225             iMax = iMid;        // offset is below or within lineStartOffsets_[iMid]
   226     }
   227     JS_ASSERT(iMax == iMin);
   228     JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
   229     lastLineIndex_ = iMin;
   230     return iMin;
   231 }
   233 uint32_t
   234 TokenStream::SourceCoords::lineNum(uint32_t offset) const
   235 {
   236     uint32_t lineIndex = lineIndexOf(offset);
   237     return lineIndexToNum(lineIndex);
   238 }
   240 uint32_t
   241 TokenStream::SourceCoords::columnIndex(uint32_t offset) const
   242 {
   243     uint32_t lineIndex = lineIndexOf(offset);
   244     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
   245     JS_ASSERT(offset >= lineStartOffset);
   246     return offset - lineStartOffset;
   247 }
   249 void
   250 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum,
   251                                                  uint32_t *columnIndex) const
   252 {
   253     uint32_t lineIndex = lineIndexOf(offset);
   254     *lineNum = lineIndexToNum(lineIndex);
   255     uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
   256     JS_ASSERT(offset >= lineStartOffset);
   257     *columnIndex = offset - lineStartOffset;
   258 }
   260 #ifdef _MSC_VER
   261 #pragma warning(push)
   262 #pragma warning(disable:4351)
   263 #endif
   265 // Initialize members that aren't initialized in |init|.
   266 TokenStream::TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
   267                          const jschar *base, size_t length, StrictModeGetter *smg)
   268   : srcCoords(cx, options.lineno),
   269     options_(options),
   270     tokens(),
   271     cursor(),
   272     lookahead(),
   273     lineno(options.lineno),
   274     flags(),
   275     linebase(base - options.column),
   276     prevLinebase(nullptr),
   277     userbuf(cx, base - options.column, length + options.column), // See comment below
   278     filename(options.filename()),
   279     displayURL_(nullptr),
   280     sourceMapURL_(nullptr),
   281     tokenbuf(cx),
   282     cx(cx),
   283     originPrincipals(options.originPrincipals(cx)),
   284     strictModeGetter(smg)
   285 {
   286     // The caller must ensure that a reference is held on the supplied principals
   287     // throughout compilation.
   288     JS_ASSERT_IF(originPrincipals, originPrincipals->refcount > 0);
   290     // Column numbers are computed as offsets from the current line's base, so the
   291     // initial line's base must be included in the buffer. linebase and userbuf
   292     // were adjusted above, and if we are starting tokenization part way through
   293     // this line then adjust the next character.
   294     userbuf.setAddressOfNextRawChar(base);
   296     // Nb: the following tables could be static, but initializing them here is
   297     // much easier.  Don't worry, the time to initialize them for each
   298     // TokenStream is trivial.  See bug 639420.
   300     // See getChar() for an explanation of maybeEOL[].
   301     memset(maybeEOL, 0, sizeof(maybeEOL));
   302     maybeEOL[unsigned('\n')] = true;
   303     maybeEOL[unsigned('\r')] = true;
   304     maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
   305     maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
   307     // See getTokenInternal() for an explanation of maybeStrSpecial[].
   308     memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
   309     maybeStrSpecial[unsigned('"')] = true;
   310     maybeStrSpecial[unsigned('\'')] = true;
   311     maybeStrSpecial[unsigned('\\')] = true;
   312     maybeStrSpecial[unsigned('\n')] = true;
   313     maybeStrSpecial[unsigned('\r')] = true;
   314     maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
   315     maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
   316     maybeStrSpecial[unsigned(EOF & 0xff)] = true;
   318     // See Parser::assignExpr() for an explanation of isExprEnding[].
   319     memset(isExprEnding, 0, sizeof(isExprEnding));
   320     isExprEnding[TOK_COMMA] = 1;
   321     isExprEnding[TOK_SEMI]  = 1;
   322     isExprEnding[TOK_COLON] = 1;
   323     isExprEnding[TOK_RP]    = 1;
   324     isExprEnding[TOK_RB]    = 1;
   325     isExprEnding[TOK_RC]    = 1;
   326 }
   328 #ifdef _MSC_VER
   329 #pragma warning(pop)
   330 #endif
   332 TokenStream::~TokenStream()
   333 {
   334     js_free(displayURL_);
   335     js_free(sourceMapURL_);
   337     JS_ASSERT_IF(originPrincipals, originPrincipals->refcount);
   338 }
   340 // Use the fastest available getc.
   341 #if defined(HAVE_GETC_UNLOCKED)
   342 # define fast_getc getc_unlocked
   343 #elif defined(HAVE__GETC_NOLOCK)
   344 # define fast_getc _getc_nolock
   345 #else
   346 # define fast_getc getc
   347 #endif
   349 MOZ_ALWAYS_INLINE void
   350 TokenStream::updateLineInfoForEOL()
   351 {
   352     prevLinebase = linebase;
   353     linebase = userbuf.addressOfNextRawChar();
   354     lineno++;
   355     srcCoords.add(lineno, linebase - userbuf.base());
   356 }
   358 MOZ_ALWAYS_INLINE void
   359 TokenStream::updateFlagsForEOL()
   360 {
   361     flags.isDirtyLine = false;
   362 }
   364 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.
   365 int32_t
   366 TokenStream::getChar()
   367 {
   368     int32_t c;
   369     if (MOZ_LIKELY(userbuf.hasRawChars())) {
   370         c = userbuf.getRawChar();
   372         // Normalize the jschar if it was a newline.  We need to detect any of
   373         // these four characters:  '\n' (0x000a), '\r' (0x000d),
   374         // LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029).  Testing for each
   375         // one in turn is slow, so we use a single probabilistic check, and if
   376         // that succeeds, test for them individually.
   377         //
   378         // We use the bottom 8 bits to index into a lookup table, succeeding
   379         // when d&0xff is 0xa, 0xd, 0x28 or 0x29.  Among ASCII chars (which
   380         // are by the far the most common) this gives false positives for '('
   381         // (0x0028) and ')' (0x0029).  We could avoid those by incorporating
   382         // the 13th bit of d into the lookup, but that requires extra shifting
   383         // and masking and isn't worthwhile.  See TokenStream::TokenStream()
   384         // for the initialization of the relevant entries in the table.
   385         if (MOZ_UNLIKELY(maybeEOL[c & 0xff])) {
   386             if (c == '\n')
   387                 goto eol;
   388             if (c == '\r') {
   389                 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
   390                 if (userbuf.hasRawChars())
   391                     userbuf.matchRawChar('\n');
   392                 goto eol;
   393             }
   394             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
   395                 goto eol;
   396         }
   397         return c;
   398     }
   400     flags.isEOF = true;
   401     return EOF;
   403   eol:
   404     updateLineInfoForEOL();
   405     return '\n';
   406 }
   408 // This gets the next char. It does nothing special with EOL sequences, not
   409 // even updating the line counters.  It can be used safely if (a) the
   410 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
   411 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used
   412 // before it's ungotten.
   413 int32_t
   414 TokenStream::getCharIgnoreEOL()
   415 {
   416     if (MOZ_LIKELY(userbuf.hasRawChars()))
   417         return userbuf.getRawChar();
   419     flags.isEOF = true;
   420     return EOF;
   421 }
   423 void
   424 TokenStream::ungetChar(int32_t c)
   425 {
   426     if (c == EOF)
   427         return;
   428     JS_ASSERT(!userbuf.atStart());
   429     userbuf.ungetRawChar();
   430     if (c == '\n') {
   431 #ifdef DEBUG
   432         int32_t c2 = userbuf.peekRawChar();
   433         JS_ASSERT(TokenBuf::isRawEOLChar(c2));
   434 #endif
   436         // If it's a \r\n sequence, also unget the \r.
   437         if (!userbuf.atStart())
   438             userbuf.matchRawCharBackwards('\r');
   440         JS_ASSERT(prevLinebase);    // we should never get more than one EOL char
   441         linebase = prevLinebase;
   442         prevLinebase = nullptr;
   443         lineno--;
   444     } else {
   445         JS_ASSERT(userbuf.peekRawChar() == c);
   446     }
   447 }
   449 void
   450 TokenStream::ungetCharIgnoreEOL(int32_t c)
   451 {
   452     if (c == EOF)
   453         return;
   454     JS_ASSERT(!userbuf.atStart());
   455     userbuf.ungetRawChar();
   456 }
   458 // Return true iff |n| raw characters can be read from this without reading past
   459 // EOF or a newline, and copy those characters into |cp| if so.  The characters
   460 // are not consumed: use skipChars(n) to do so after checking that the consumed
   461 // characters had appropriate values.
   462 bool
   463 TokenStream::peekChars(int n, jschar *cp)
   464 {
   465     int i, j;
   466     int32_t c;
   468     for (i = 0; i < n; i++) {
   469         c = getCharIgnoreEOL();
   470         if (c == EOF)
   471             break;
   472         if (c == '\n') {
   473             ungetCharIgnoreEOL(c);
   474             break;
   475         }
   476         cp[i] = jschar(c);
   477     }
   478     for (j = i - 1; j >= 0; j--)
   479         ungetCharIgnoreEOL(cp[j]);
   480     return i == n;
   481 }
   483 const jschar *
   484 TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max)
   485 {
   486     JS_ASSERT(base_ <= p && p <= limit_);
   488     size_t n = 0;
   489     while (true) {
   490         if (p >= limit_)
   491             break;
   492         if (n >= max)
   493             break;
   494         if (TokenBuf::isRawEOLChar(*p++))
   495             break;
   496         n++;
   497     }
   498     return p;
   499 }
   501 void
   502 TokenStream::advance(size_t position)
   503 {
   504     const jschar *end = userbuf.base() + position;
   505     while (userbuf.addressOfNextRawChar() < end)
   506         getChar();
   508     Token *cur = &tokens[cursor];
   509     cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base();
   510     cur->type = TOK_ERROR;
   511     lookahead = 0;
   512 }
   514 void
   515 TokenStream::tell(Position *pos)
   516 {
   517     pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
   518     pos->flags = flags;
   519     pos->lineno = lineno;
   520     pos->linebase = linebase;
   521     pos->prevLinebase = prevLinebase;
   522     pos->lookahead = lookahead;
   523     pos->currentToken = currentToken();
   524     for (unsigned i = 0; i < lookahead; i++)
   525         pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
   526 }
   528 void
   529 TokenStream::seek(const Position &pos)
   530 {
   531     userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
   532     flags = pos.flags;
   533     lineno = pos.lineno;
   534     linebase = pos.linebase;
   535     prevLinebase = pos.prevLinebase;
   536     lookahead = pos.lookahead;
   538     tokens[cursor] = pos.currentToken;
   539     for (unsigned i = 0; i < lookahead; i++)
   540         tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
   541 }
   543 bool
   544 TokenStream::seek(const Position &pos, const TokenStream &other)
   545 {
   546     if (!srcCoords.fill(other.srcCoords))
   547         return false;
   548     seek(pos);
   549     return true;
   550 }
   552 bool
   553 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
   554                                            va_list args)
   555 {
   556     // In strict mode code, this is an error, not merely a warning.
   557     unsigned flags = JSREPORT_STRICT;
   558     if (strictMode)
   559         flags |= JSREPORT_ERROR;
   560     else if (options().extraWarningsOption)
   561         flags |= JSREPORT_WARNING;
   562     else
   563         return true;
   565     return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
   566 }
   568 void
   569 CompileError::throwError(JSContext *cx)
   570 {
   571     // If there's a runtime exception type associated with this error
   572     // number, set that as the pending exception.  For errors occuring at
   573     // compile time, this is very likely to be a JSEXN_SYNTAXERR.
   574     //
   575     // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
   576     // flag will be set in report.flags.  Proper behavior for an error
   577     // reporter is to ignore a report with this flag for all but top-level
   578     // compilation errors.  The exception will remain pending, and so long
   579     // as the non-top-level "load", "eval", or "compile" native function
   580     // returns false, the top-level reporter will eventually receive the
   581     // uncaught exception report.
   582     if (!js_ErrorToException(cx, message, &report, nullptr, nullptr))
   583         CallErrorReporter(cx, message, &report);
   584 }
   586 CompileError::~CompileError()
   587 {
   588     js_free((void*)report.uclinebuf);
   589     js_free((void*)report.linebuf);
   590     js_free((void*)report.ucmessage);
   591     js_free(message);
   592     message = nullptr;
   594     if (report.messageArgs) {
   595         if (argumentsType == ArgumentsAreASCII) {
   596             unsigned i = 0;
   597             while (report.messageArgs[i])
   598                 js_free((void*)report.messageArgs[i++]);
   599         }
   600         js_free(report.messageArgs);
   601     }
   603     PodZero(&report);
   604 }
   606 bool
   607 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
   608                                         va_list args)
   609 {
   610     bool warning = JSREPORT_IS_WARNING(flags);
   612     if (warning && options().werrorOption) {
   613         flags &= ~JSREPORT_WARNING;
   614         warning = false;
   615     }
   617     // On the main thread, report the error immediately. When compiling off
   618     // thread, save the error so that the main thread can report it later.
   619     CompileError tempErr;
   620     CompileError &err = cx->isJSContext() ? tempErr : cx->addPendingCompileError();
   622     err.report.flags = flags;
   623     err.report.errorNumber = errorNumber;
   624     err.report.filename = filename;
   625     err.report.originPrincipals = originPrincipals;
   626     if (offset == NoOffset) {
   627         err.report.lineno = 0;
   628         err.report.column = 0;
   629     } else {
   630         err.report.lineno = srcCoords.lineNum(offset);
   631         err.report.column = srcCoords.columnIndex(offset);
   632     }
   634     err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;
   636     if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, nullptr, errorNumber, &err.message,
   637                                  &err.report, err.argumentsType, args))
   638     {
   639         return false;
   640     }
   642     // Given a token, T, that we want to complain about: if T's (starting)
   643     // lineno doesn't match TokenStream's lineno, that means we've scanned past
   644     // the line that T starts on, which makes it hard to print some or all of
   645     // T's (starting) line for context.
   646     //
   647     // So we don't even try, leaving report.linebuf and friends zeroed.  This
   648     // means that any error involving a multi-line token (e.g. an unterminated
   649     // multi-line string literal) won't have a context printed.
   650     if (offset != NoOffset && err.report.lineno == lineno) {
   651         const jschar *tokenStart = userbuf.base() + offset;
   653         // We show only a portion (a "window") of the line around the erroneous
   654         // token -- the first char in the token, plus |windowRadius| chars
   655         // before it and |windowRadius - 1| chars after it.  This is because
   656         // lines can be very long and printing the whole line is (a) not that
   657         // helpful, and (b) can waste a lot of memory.  See bug 634444.
   658         static const size_t windowRadius = 60;
   660         // Truncate at the front if necessary.
   661         const jschar *windowBase = (linebase + windowRadius < tokenStart)
   662                                  ? tokenStart - windowRadius
   663                                  : linebase;
   664         uint32_t windowOffset = tokenStart - windowBase;
   666         // Find EOL, or truncate at the back if necessary.
   667         const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius);
   668         size_t windowLength = windowLimit - windowBase;
   669         JS_ASSERT(windowLength <= windowRadius * 2);
   671         // Create the windowed strings.
   672         StringBuffer windowBuf(cx);
   673         if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0))
   674             return false;
   676         // Unicode and char versions of the window into the offending source
   677         // line, without final \n.
   678         err.report.uclinebuf = windowBuf.extractWellSized();
   679         if (!err.report.uclinebuf)
   680             return false;
   681         TwoByteChars tbchars(err.report.uclinebuf, windowLength);
   682         err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str();
   683         if (!err.report.linebuf)
   684             return false;
   686         err.report.tokenptr = err.report.linebuf + windowOffset;
   687         err.report.uctokenptr = err.report.uclinebuf + windowOffset;
   688     }
   690     if (cx->isJSContext())
   691         err.throwError(cx->asJSContext());
   693     return warning;
   694 }
   696 bool
   697 TokenStream::reportStrictModeError(unsigned errorNumber, ...)
   698 {
   699     va_list args;
   700     va_start(args, errorNumber);
   701     bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
   702                                                 errorNumber, args);
   703     va_end(args);
   704     return result;
   705 }
   707 bool
   708 TokenStream::reportError(unsigned errorNumber, ...)
   709 {
   710     va_list args;
   711     va_start(args, errorNumber);
   712     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
   713                                              args);
   714     va_end(args);
   715     return result;
   716 }
   718 bool
   719 TokenStream::reportWarning(unsigned errorNumber, ...)
   720 {
   721     va_list args;
   722     va_start(args, errorNumber);
   723     bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
   724                                              errorNumber, args);
   725     va_end(args);
   726     return result;
   727 }
   729 bool
   730 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
   731 {
   732     if (!options().extraWarningsOption)
   733         return true;
   735     return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
   736 }
   738 void
   739 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
   740 {
   741     va_list args;
   742     va_start(args, errorNumber);
   743     reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args);
   744     va_end(args);
   745 }
   747 // We have encountered a '\': check for a Unicode escape sequence after it.
   748 // Return 'true' and the character code value (by value) if we found a
   749 // Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
   750 // advance along the buffer.
   751 bool
   752 TokenStream::peekUnicodeEscape(int *result)
   753 {
   754     jschar cp[5];
   756     if (peekChars(5, cp) && cp[0] == 'u' &&
   757         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
   758         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
   759     {
   760         *result = (((((JS7_UNHEX(cp[1]) << 4)
   761                 + JS7_UNHEX(cp[2])) << 4)
   762               + JS7_UNHEX(cp[3])) << 4)
   763             + JS7_UNHEX(cp[4]);
   764         return true;
   765     }
   766     return false;
   767 }
   769 bool
   770 TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
   771 {
   772     if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
   773         skipChars(5);
   774         return true;
   775     }
   776     return false;
   777 }
   779 bool
   780 TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
   781 {
   782     if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
   783         skipChars(5);
   784         return true;
   785     }
   786     return false;
   787 }
   789 // Helper function which returns true if the first length(q) characters in p are
   790 // the same as the characters in q.
   791 static bool
   792 CharsMatch(const jschar *p, const char *q) {
   793     while (*q) {
   794         if (*p++ != *q++)
   795             return false;
   796     }
   797     return true;
   798 }
   800 bool
   801 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
   802 {
   803     // Match directive comments used in debugging, such as "//# sourceURL" and
   804     // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
   805     //
   806     // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
   807     // line comments containing a source mapping URL inside a multiline
   808     // comment. To avoid potentially expensive lookahead and backtracking, we
   809     // only check for this case if we encounter a '#' character.
   811     if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
   812         return false;
   813     if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
   814         return false;
   816     return true;
   817 }
   819 bool
   820 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
   821                           const char *directive, int directiveLength,
   822                           const char *errorMsgPragma, jschar **destination) {
   823     JS_ASSERT(directiveLength <= 18);
   824     jschar peeked[18];
   825     int32_t c;
   827     if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
   828         if (shouldWarnDeprecated &&
   829             !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
   830             return false;
   832         skipChars(directiveLength);
   833         tokenbuf.clear();
   835         while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
   836             getChar();
   837             // Debugging directives can occur in both single- and multi-line
   838             // comments. If we're currently inside a multi-line comment, we also
   839             // need to recognize multi-line comment terminators.
   840             if (isMultiline && c == '*' && peekChar() == '/') {
   841                 ungetChar('*');
   842                 break;
   843             }
   844             tokenbuf.append(c);
   845         }
   847         if (tokenbuf.empty())
   848             // The directive's URL was missing, but this is not quite an
   849             // exception that we should stop and drop everything for.
   850             return true;
   852         size_t length = tokenbuf.length();
   854         js_free(*destination);
   855         *destination = cx->pod_malloc<jschar>(length + 1);
   856         if (!*destination)
   857             return false;
   859         PodCopy(*destination, tokenbuf.begin(), length);
   860         (*destination)[length] = '\0';
   861     }
   863     return true;
   864 }
   866 bool
   867 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
   868 {
   869     // Match comments of the form "//# sourceURL=<url>" or
   870     // "/\* //# sourceURL=<url> *\/"
   871     //
   872     // Note that while these are labeled "sourceURL" in the source text,
   873     // internally we refer to it as a "displayURL" to distinguish what the
   874     // developer would like to refer to the source as from the source's actual
   875     // URL.
   877     return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
   878                         "sourceURL", &displayURL_);
   879 }
   881 bool
   882 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
   883 {
   884     // Match comments of the form "//# sourceMappingURL=<url>" or
   885     // "/\* //# sourceMappingURL=<url> *\/"
   887     return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
   888                         "sourceMappingURL", &sourceMapURL_);
   889 }
   891 MOZ_ALWAYS_INLINE Token *
   892 TokenStream::newToken(ptrdiff_t adjust)
   893 {
   894     cursor = (cursor + 1) & ntokensMask;
   895     Token *tp = &tokens[cursor];
   896     tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base();
   898     // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
   899     MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
   901     return tp;
   902 }
   904 MOZ_ALWAYS_INLINE JSAtom *
   905 TokenStream::atomize(ExclusiveContext *cx, CharBuffer &cb)
   906 {
   907     return AtomizeChars(cx, cb.begin(), cb.length());
   908 }
   910 #ifdef DEBUG
   911 static bool
   912 IsTokenSane(Token *tp)
   913 {
   914     // Nb: TOK_EOL should never be used in an actual Token;  it should only be
   915     // returned as a TokenKind from peekTokenSameLine().
   916     if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
   917         return false;
   919     if (tp->pos.end < tp->pos.begin)
   920         return false;
   922     return true;
   923 }
   924 #endif
   926 bool
   927 TokenStream::putIdentInTokenbuf(const jschar *identStart)
   928 {
   929     int32_t c, qc;
   930     const jschar *tmp = userbuf.addressOfNextRawChar();
   931     userbuf.setAddressOfNextRawChar(identStart);
   933     tokenbuf.clear();
   934     for (;;) {
   935         c = getCharIgnoreEOL();
   936         if (!IsIdentifierPart(c)) {
   937             if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
   938                 break;
   939             c = qc;
   940         }
   941         if (!tokenbuf.append(c)) {
   942             userbuf.setAddressOfNextRawChar(tmp);
   943             return false;
   944         }
   945     }
   946     userbuf.setAddressOfNextRawChar(tmp);
   947     return true;
   948 }
   950 bool
   951 TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp)
   952 {
   953     const KeywordInfo *kw = FindKeyword(s, length);
   954     if (!kw)
   955         return true;
   957     if (kw->tokentype == TOK_RESERVED)
   958         return reportError(JSMSG_RESERVED_ID, kw->chars);
   960     if (kw->tokentype != TOK_STRICT_RESERVED) {
   961         if (kw->version <= versionNumber()) {
   962             // Working keyword.
   963             if (ttp) {
   964                 *ttp = kw->tokentype;
   965                 return true;
   966             }
   967             return reportError(JSMSG_RESERVED_ID, kw->chars);
   968         }
   970         // The keyword is not in this version. Treat it as an identifier, unless
   971         // it is let which we treat as TOK_STRICT_RESERVED by falling through to
   972         // the code below (ES5 forbids it in strict mode).
   973         if (kw->tokentype != TOK_LET)
   974             return true;
   975     }
   977     // Strict reserved word.
   978     return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
   979 }
   981 enum FirstCharKind {
   982     // A jschar has the 'OneChar' kind if it, by itself, constitutes a valid
   983     // token that cannot also be a prefix of a longer token.  E.g. ';' has the
   984     // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
   985     // that begin with '+'.
   986     //
   987     // The few token kinds satisfying these properties cover roughly 35--45%
   988     // of the tokens seen in practice.
   989     //
   990     // We represent the 'OneChar' kind with any positive value less than
   991     // TOK_LIMIT.  This representation lets us associate each one-char token
   992     // jschar with a TokenKind and thus avoid a subsequent jschar-to-TokenKind
   993     // conversion.
   994     OneChar_Min = 0,
   995     OneChar_Max = TOK_LIMIT - 1,
   997     Space = TOK_LIMIT,
   998     Ident,
   999     Dec,
  1000     String,
  1001     EOL,
  1002     BasePrefix,
  1003     Other,
  1005     LastCharKind = Other
  1006 };
  1008 // OneChar: 40,  41,  44,  58,  59,  63,  91,  93,  123, 125, 126:
  1009 //          '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
  1010 // Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
  1011 // Dot:     46: '.'
  1012 // Equals:  61: '='
  1013 // String:  34, 39: '"', '\''
  1014 // Dec:     49..57: '1'..'9'
  1015 // Plus:    43: '+'
  1016 // BasePrefix:  48: '0'
  1017 // Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '
  1018 // EOL:     10, 13: '\n', '\r'
  1019 //
  1020 #define T_COMMA     TOK_COMMA
  1021 #define T_COLON     TOK_COLON
  1022 #define T_BITNOT    TOK_BITNOT
  1023 #define _______ Other
  1024 static const uint8_t firstCharKinds[] = {
  1025 /*         0        1        2        3        4        5        6        7        8        9    */
  1026 /*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
  1027 /*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
  1028 /*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
  1029 /*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
  1030 /*  40+ */  TOK_LP,  TOK_RP, _______, _______, T_COMMA,_______,  _______, _______,BasePrefix,  Dec,
  1031 /*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,    Dec,  T_COLON,TOK_SEMI,
  1032 /*  60+ */ _______, _______, _______,TOK_HOOK, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
  1033 /*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1034 /*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1035 /*  90+ */   Ident,  TOK_LB, _______,  TOK_RB, _______,   Ident, _______,   Ident,   Ident,   Ident,
  1036 /* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1037 /* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1038 /* 120+ */   Ident,   Ident,   Ident,  TOK_LC, _______,  TOK_RC,T_BITNOT, _______
  1039 };
  1040 #undef T_COMMA
  1041 #undef T_COLON
  1042 #undef T_BITNOT
  1043 #undef _______
  1045 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
  1046               "Elements of firstCharKinds[] are too small");
  1048 TokenKind
  1049 TokenStream::getTokenInternal(Modifier modifier)
  1051     int c, qc;
  1052     Token *tp;
  1053     FirstCharKind c1kind;
  1054     const jschar *numStart;
  1055     bool hasExp;
  1056     DecimalPoint decimalPoint;
  1057     const jschar *identStart;
  1058     bool hadUnicodeEscape;
  1060   retry:
  1061     if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
  1062         tp = newToken(0);
  1063         tp->type = TOK_EOF;
  1064         flags.isEOF = true;
  1065         goto out;
  1068     c = userbuf.getRawChar();
  1069     JS_ASSERT(c != EOF);
  1071     // Chars not in the range 0..127 are rare.  Getting them out of the way
  1072     // early allows subsequent checking to be faster.
  1073     if (MOZ_UNLIKELY(c >= 128)) {
  1074         if (IsSpaceOrBOM2(c)) {
  1075             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
  1076                 updateLineInfoForEOL();
  1077                 updateFlagsForEOL();
  1080             goto retry;
  1083         tp = newToken(-1);
  1085         // '$' and '_' don't pass IsLetter, but they're < 128 so never appear here.
  1086         JS_STATIC_ASSERT('$' < 128 && '_' < 128);
  1087         if (IsLetter(c)) {
  1088             identStart = userbuf.addressOfNextRawChar() - 1;
  1089             hadUnicodeEscape = false;
  1090             goto identifier;
  1093         goto badchar;
  1096     // Get the token kind, based on the first char.  The ordering of c1kind
  1097     // comparison is based on the frequency of tokens in real code -- Parsemark
  1098     // (which represents typical JS code on the web) and the Unreal demo (which
  1099     // represents asm.js code).
  1100     //
  1101     //                  Parsemark   Unreal
  1102     //  OneChar         32.9%       39.7%
  1103     //  Space           25.0%        0.6%
  1104     //  Ident           19.2%       36.4%
  1105     //  Dec              7.2%        5.1%
  1106     //  String           7.9%        0.0%
  1107     //  EOL              1.7%        0.0%
  1108     //  BasePrefix       0.4%        4.9%
  1109     //  Other            5.7%       13.3%
  1110     //
  1111     // The ordering is based mostly only Parsemark frequencies, with Unreal
  1112     // frequencies used to break close categories (e.g. |Dec| and |String|).
  1113     // |Other| is biggish, but no other token kind is common enough for it to
  1114     // be worth adding extra values to FirstCharKind.
  1115     //
  1116     c1kind = FirstCharKind(firstCharKinds[c]);
  1118     // Look for an unambiguous single-char token.
  1119     //
  1120     if (c1kind <= OneChar_Max) {
  1121         tp = newToken(-1);
  1122         tp->type = TokenKind(c1kind);
  1123         goto out;
  1126     // Skip over non-EOL whitespace chars.
  1127     //
  1128     if (c1kind == Space)
  1129         goto retry;
  1131     // Look for an identifier.
  1132     //
  1133     if (c1kind == Ident) {
  1134         tp = newToken(-1);
  1135         identStart = userbuf.addressOfNextRawChar() - 1;
  1136         hadUnicodeEscape = false;
  1138       identifier:
  1139         for (;;) {
  1140             c = getCharIgnoreEOL();
  1141             if (c == EOF)
  1142                 break;
  1143             if (!IsIdentifierPart(c)) {
  1144                 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
  1145                     break;
  1146                 hadUnicodeEscape = true;
  1149         ungetCharIgnoreEOL(c);
  1151         // Identifiers containing no Unicode escapes can be processed directly
  1152         // from userbuf.  The rest must use the escapes converted via tokenbuf
  1153         // before atomizing.
  1154         const jschar *chars;
  1155         size_t length;
  1156         if (hadUnicodeEscape) {
  1157             if (!putIdentInTokenbuf(identStart))
  1158                 goto error;
  1160             chars = tokenbuf.begin();
  1161             length = tokenbuf.length();
  1162         } else {
  1163             chars = identStart;
  1164             length = userbuf.addressOfNextRawChar() - identStart;
  1167         // Check for keywords unless the parser told us not to.
  1168         if (modifier != KeywordIsName) {
  1169             tp->type = TOK_NAME;
  1170             if (!checkForKeyword(chars, length, &tp->type))
  1171                 goto error;
  1172             if (tp->type != TOK_NAME)
  1173                 goto out;
  1176         JSAtom *atom = AtomizeChars(cx, chars, length);
  1177         if (!atom)
  1178             goto error;
  1179         tp->type = TOK_NAME;
  1180         tp->setName(atom->asPropertyName());
  1181         goto out;
  1184     // Look for a decimal number.
  1185     //
  1186     if (c1kind == Dec) {
  1187         tp = newToken(-1);
  1188         numStart = userbuf.addressOfNextRawChar() - 1;
  1190       decimal:
  1191         decimalPoint = NoDecimal;
  1192         hasExp = false;
  1193         while (JS7_ISDEC(c))
  1194             c = getCharIgnoreEOL();
  1196         if (c == '.') {
  1197             decimalPoint = HasDecimal;
  1198           decimal_dot:
  1199             do {
  1200                 c = getCharIgnoreEOL();
  1201             } while (JS7_ISDEC(c));
  1203         if (c == 'e' || c == 'E') {
  1204             hasExp = true;
  1205             c = getCharIgnoreEOL();
  1206             if (c == '+' || c == '-')
  1207                 c = getCharIgnoreEOL();
  1208             if (!JS7_ISDEC(c)) {
  1209                 ungetCharIgnoreEOL(c);
  1210                 reportError(JSMSG_MISSING_EXPONENT);
  1211                 goto error;
  1213             do {
  1214                 c = getCharIgnoreEOL();
  1215             } while (JS7_ISDEC(c));
  1217         ungetCharIgnoreEOL(c);
  1219         if (c != EOF && IsIdentifierStart(c)) {
  1220             reportError(JSMSG_IDSTART_AFTER_NUMBER);
  1221             goto error;
  1224         // Unlike identifiers and strings, numbers cannot contain escaped
  1225         // chars, so we don't need to use tokenbuf.  Instead we can just
  1226         // convert the jschars in userbuf directly to the numeric value.
  1227         double dval;
  1228         if (!((decimalPoint == HasDecimal) || hasExp)) {
  1229             if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
  1230                 goto error;
  1231         } else {
  1232             const jschar *dummy;
  1233             if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
  1234                 goto error;
  1236         tp->type = TOK_NUMBER;
  1237         tp->setNumber(dval, decimalPoint);
  1238         goto out;
  1241     // Look for a string.
  1242     //
  1243     if (c1kind == String) {
  1244         tp = newToken(-1);
  1245         qc = c;
  1246         tokenbuf.clear();
  1247         while (true) {
  1248             // We need to detect any of these chars:  " or ', \n (or its
  1249             // equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
  1250             // similar to maybeEOL[], see above.  Because we detect EOL
  1251             // sequences here and put them back immediately, we can use
  1252             // getCharIgnoreEOL().
  1253             c = getCharIgnoreEOL();
  1254             if (maybeStrSpecial[c & 0xff]) {
  1255                 if (c == qc)
  1256                     break;
  1257                 if (c == '\\') {
  1258                     switch (c = getChar()) {
  1259                       case 'b': c = '\b'; break;
  1260                       case 'f': c = '\f'; break;
  1261                       case 'n': c = '\n'; break;
  1262                       case 'r': c = '\r'; break;
  1263                       case 't': c = '\t'; break;
  1264                       case 'v': c = '\v'; break;
  1266                       default:
  1267                         if ('0' <= c && c < '8') {
  1268                             int32_t val = JS7_UNDEC(c);
  1270                             c = peekChar();
  1271                             // Strict mode code allows only \0, then a non-digit.
  1272                             if (val != 0 || JS7_ISDEC(c)) {
  1273                                 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
  1274                                     goto error;
  1275                                 flags.sawOctalEscape = true;
  1277                             if ('0' <= c && c < '8') {
  1278                                 val = 8 * val + JS7_UNDEC(c);
  1279                                 getChar();
  1280                                 c = peekChar();
  1281                                 if ('0' <= c && c < '8') {
  1282                                     int32_t save = val;
  1283                                     val = 8 * val + JS7_UNDEC(c);
  1284                                     if (val <= 0377)
  1285                                         getChar();
  1286                                     else
  1287                                         val = save;
  1291                             c = jschar(val);
  1292                         } else if (c == 'u') {
  1293                             jschar cp[4];
  1294                             if (peekChars(4, cp) &&
  1295                                 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
  1296                                 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
  1297                                 c = (((((JS7_UNHEX(cp[0]) << 4)
  1298                                         + JS7_UNHEX(cp[1])) << 4)
  1299                                       + JS7_UNHEX(cp[2])) << 4)
  1300                                     + JS7_UNHEX(cp[3]);
  1301                                 skipChars(4);
  1302                             } else {
  1303                                 reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
  1304                                 goto error;
  1306                         } else if (c == 'x') {
  1307                             jschar cp[2];
  1308                             if (peekChars(2, cp) &&
  1309                                 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
  1310                                 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
  1311                                 skipChars(2);
  1312                             } else {
  1313                                 reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
  1314                                 goto error;
  1316                         } else if (c == '\n') {
  1317                             // ES5 7.8.4: an escaped line terminator represents
  1318                             // no character.
  1319                             continue;
  1321                         break;
  1323                 } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
  1324                     ungetCharIgnoreEOL(c);
  1325                     reportError(JSMSG_UNTERMINATED_STRING);
  1326                     goto error;
  1329             if (!tokenbuf.append(c))
  1330                 goto error;
  1332         JSAtom *atom = atomize(cx, tokenbuf);
  1333         if (!atom)
  1334             goto error;
  1335         tp->type = TOK_STRING;
  1336         tp->setAtom(atom);
  1337         goto out;
  1340     // Skip over EOL chars, updating line state along the way.
  1341     //
  1342     if (c1kind == EOL) {
  1343         // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
  1344         if (c == '\r' && userbuf.hasRawChars())
  1345             userbuf.matchRawChar('\n');
  1346         updateLineInfoForEOL();
  1347         updateFlagsForEOL();
  1348         goto retry;
  1351     // Look for a hexadecimal, octal, or binary number.
  1352     //
  1353     if (c1kind == BasePrefix) {
  1354         tp = newToken(-1);
  1355         int radix;
  1356         c = getCharIgnoreEOL();
  1357         if (c == 'x' || c == 'X') {
  1358             radix = 16;
  1359             c = getCharIgnoreEOL();
  1360             if (!JS7_ISHEX(c)) {
  1361                 ungetCharIgnoreEOL(c);
  1362                 reportError(JSMSG_MISSING_HEXDIGITS);
  1363                 goto error;
  1365             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0x'
  1366             while (JS7_ISHEX(c))
  1367                 c = getCharIgnoreEOL();
  1368         } else if (c == 'b' || c == 'B') {
  1369             radix = 2;
  1370             c = getCharIgnoreEOL();
  1371             if (c != '0' && c != '1') {
  1372                 ungetCharIgnoreEOL(c);
  1373                 reportError(JSMSG_MISSING_BINARY_DIGITS);
  1374                 goto error;
  1376             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0b'
  1377             while (c == '0' || c == '1')
  1378                 c = getCharIgnoreEOL();
  1379         } else if (c == 'o' || c == 'O') {
  1380             radix = 8;
  1381             c = getCharIgnoreEOL();
  1382             if (c < '0' || c > '7') {
  1383                 ungetCharIgnoreEOL(c);
  1384                 reportError(JSMSG_MISSING_OCTAL_DIGITS);
  1385                 goto error;
  1387             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0o'
  1388             while ('0' <= c && c <= '7')
  1389                 c = getCharIgnoreEOL();
  1390         } else if (JS7_ISDEC(c)) {
  1391             radix = 8;
  1392             numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0'
  1393             while (JS7_ISDEC(c)) {
  1394                 // Octal integer literals are not permitted in strict mode code.
  1395                 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
  1396                     goto error;
  1398                 // Outside strict mode, we permit 08 and 09 as decimal numbers,
  1399                 // which makes our behaviour a superset of the ECMA numeric
  1400                 // grammar. We might not always be so permissive, so we warn
  1401                 // about it.
  1402                 if (c >= '8') {
  1403                     if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
  1404                         goto error;
  1406                     goto decimal;   // use the decimal scanner for the rest of the number
  1408                 c = getCharIgnoreEOL();
  1410         } else {
  1411             // '0' not followed by 'x', 'X' or a digit;  scan as a decimal number.
  1412             numStart = userbuf.addressOfNextRawChar() - 1;
  1413             goto decimal;
  1415         ungetCharIgnoreEOL(c);
  1417         if (c != EOF && IsIdentifierStart(c)) {
  1418             reportError(JSMSG_IDSTART_AFTER_NUMBER);
  1419             goto error;
  1422         double dval;
  1423         const jschar *dummy;
  1424         if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
  1425             goto error;
  1426         tp->type = TOK_NUMBER;
  1427         tp->setNumber(dval, NoDecimal);
  1428         goto out;
  1431     // This handles everything else.
  1432     //
  1433     JS_ASSERT(c1kind == Other);
  1434     tp = newToken(-1);
  1435     switch (c) {
  1436       case '.':
  1437         c = getCharIgnoreEOL();
  1438         if (JS7_ISDEC(c)) {
  1439             numStart = userbuf.addressOfNextRawChar() - 2;
  1440             decimalPoint = HasDecimal;
  1441             hasExp = false;
  1442             goto decimal_dot;
  1444         if (c == '.') {
  1445             if (matchChar('.')) {
  1446                 tp->type = TOK_TRIPLEDOT;
  1447                 goto out;
  1450         ungetCharIgnoreEOL(c);
  1451         tp->type = TOK_DOT;
  1452         goto out;
  1454       case '=':
  1455         if (matchChar('='))
  1456             tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
  1457         else if (matchChar('>'))
  1458             tp->type = TOK_ARROW;
  1459         else
  1460             tp->type = TOK_ASSIGN;
  1461         goto out;
  1463       case '+':
  1464         if (matchChar('+'))
  1465             tp->type = TOK_INC;
  1466         else
  1467             tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
  1468         goto out;
  1470       case '\\':
  1471         hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
  1472         if (hadUnicodeEscape) {
  1473             identStart = userbuf.addressOfNextRawChar() - 6;
  1474             goto identifier;
  1476         goto badchar;
  1478       case '|':
  1479         if (matchChar('|'))
  1480             tp->type = TOK_OR;
  1481         else
  1482             tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
  1483         goto out;
  1485       case '^':
  1486         tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
  1487         goto out;
  1489       case '&':
  1490         if (matchChar('&'))
  1491             tp->type = TOK_AND;
  1492         else
  1493             tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
  1494         goto out;
  1496       case '!':
  1497         if (matchChar('='))
  1498             tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
  1499         else
  1500             tp->type = TOK_NOT;
  1501         goto out;
  1503       case '<':
  1504         // NB: treat HTML begin-comment as comment-till-end-of-line.
  1505         if (matchChar('!')) {
  1506             if (matchChar('-')) {
  1507                 if (matchChar('-'))
  1508                     goto skipline;
  1509                 ungetChar('-');
  1511             ungetChar('!');
  1513         if (matchChar('<')) {
  1514             tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
  1515         } else {
  1516             tp->type = matchChar('=') ? TOK_LE : TOK_LT;
  1518         goto out;
  1520       case '>':
  1521         if (matchChar('>')) {
  1522             if (matchChar('>'))
  1523                 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
  1524             else
  1525                 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
  1526         } else {
  1527             tp->type = matchChar('=') ? TOK_GE : TOK_GT;
  1529         goto out;
  1531       case '*':
  1532         tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
  1533         goto out;
  1535       case '/':
  1536         // Look for a single-line comment.
  1537         if (matchChar('/')) {
  1538             c = peekChar();
  1539             if (c == '@' || c == '#') {
  1540                 bool shouldWarn = getChar() == '@';
  1541                 if (!getDirectives(false, shouldWarn))
  1542                     goto error;
  1545         skipline:
  1546             while ((c = getChar()) != EOF && c != '\n')
  1547                 continue;
  1548             ungetChar(c);
  1549             cursor = (cursor - 1) & ntokensMask;
  1550             goto retry;
  1553         // Look for a multi-line comment.
  1554         if (matchChar('*')) {
  1555             unsigned linenoBefore = lineno;
  1556             while ((c = getChar()) != EOF &&
  1557                    !(c == '*' && matchChar('/'))) {
  1558                 if (c == '@' || c == '#') {
  1559                     bool shouldWarn = c == '@';
  1560                     if (!getDirectives(true, shouldWarn))
  1561                         goto error;
  1564             if (c == EOF) {
  1565                 reportError(JSMSG_UNTERMINATED_COMMENT);
  1566                 goto error;
  1568             if (linenoBefore != lineno)
  1569                 updateFlagsForEOL();
  1570             cursor = (cursor - 1) & ntokensMask;
  1571             goto retry;
  1574         // Look for a regexp.
  1575         if (modifier == Operand) {
  1576             tokenbuf.clear();
  1578             bool inCharClass = false;
  1579             for (;;) {
  1580                 c = getChar();
  1581                 if (c == '\\') {
  1582                     if (!tokenbuf.append(c))
  1583                         goto error;
  1584                     c = getChar();
  1585                 } else if (c == '[') {
  1586                     inCharClass = true;
  1587                 } else if (c == ']') {
  1588                     inCharClass = false;
  1589                 } else if (c == '/' && !inCharClass) {
  1590                     // For compat with IE, allow unescaped / in char classes.
  1591                     break;
  1593                 if (c == '\n' || c == EOF) {
  1594                     ungetChar(c);
  1595                     reportError(JSMSG_UNTERMINATED_REGEXP);
  1596                     goto error;
  1598                 if (!tokenbuf.append(c))
  1599                     goto error;
  1602             RegExpFlag reflags = NoFlags;
  1603             unsigned length = tokenbuf.length() + 1;
  1604             while (true) {
  1605                 c = peekChar();
  1606                 if (c == 'g' && !(reflags & GlobalFlag))
  1607                     reflags = RegExpFlag(reflags | GlobalFlag);
  1608                 else if (c == 'i' && !(reflags & IgnoreCaseFlag))
  1609                     reflags = RegExpFlag(reflags | IgnoreCaseFlag);
  1610                 else if (c == 'm' && !(reflags & MultilineFlag))
  1611                     reflags = RegExpFlag(reflags | MultilineFlag);
  1612                 else if (c == 'y' && !(reflags & StickyFlag))
  1613                     reflags = RegExpFlag(reflags | StickyFlag);
  1614                 else
  1615                     break;
  1616                 getChar();
  1617                 length++;
  1620             c = peekChar();
  1621             if (JS7_ISLET(c)) {
  1622                 char buf[2] = { '\0', '\0' };
  1623                 tp->pos.begin += length + 1;
  1624                 buf[0] = char(c);
  1625                 reportError(JSMSG_BAD_REGEXP_FLAG, buf);
  1626                 (void) getChar();
  1627                 goto error;
  1629             tp->type = TOK_REGEXP;
  1630             tp->setRegExpFlags(reflags);
  1631             goto out;
  1634         tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
  1635         goto out;
  1637       case '%':
  1638         tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
  1639         goto out;
  1641       case '-':
  1642         if (matchChar('-')) {
  1643             if (peekChar() == '>' && !flags.isDirtyLine)
  1644                 goto skipline;
  1645             tp->type = TOK_DEC;
  1646         } else {
  1647             tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
  1649         goto out;
  1651       badchar:
  1652       default:
  1653         reportError(JSMSG_ILLEGAL_CHARACTER);
  1654         goto error;
  1657     MOZ_ASSUME_UNREACHABLE("should have jumped to |out| or |error|");
  1659   out:
  1660     flags.isDirtyLine = true;
  1661     tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
  1662     JS_ASSERT(IsTokenSane(tp));
  1663     return tp->type;
  1665   error:
  1666     flags.isDirtyLine = true;
  1667     tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
  1668     tp->type = TOK_ERROR;
  1669     JS_ASSERT(IsTokenSane(tp));
  1670     onError();
  1671     return TOK_ERROR;
  1674 void
  1675 TokenStream::onError()
  1677     flags.hadError = true;
  1678 #ifdef DEBUG
  1679     // Poisoning userbuf on error establishes an invariant: once an erroneous
  1680     // token has been seen, userbuf will not be consulted again.  This is true
  1681     // because the parser will either (a) deal with the TOK_ERROR token by
  1682     // aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
  1683     // match what it expected, it will unget the token, and the next getToken()
  1684     // call will immediately return the just-gotten TOK_ERROR token again
  1685     // without consulting userbuf, thanks to the lookahead buffer.
  1686     userbuf.poison();
  1687 #endif
  1690 JS_FRIEND_API(int)
  1691 js_fgets(char *buf, int size, FILE *file)
  1693     int n, i, c;
  1694     bool crflag;
  1696     n = size - 1;
  1697     if (n < 0)
  1698         return -1;
  1700     crflag = false;
  1701     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
  1702         buf[i] = c;
  1703         if (c == '\n') {        // any \n ends a line
  1704             i++;                // keep the \n; we know there is room for \0
  1705             break;
  1707         if (crflag) {           // \r not followed by \n ends line at the \r
  1708             ungetc(c, file);
  1709             break;              // and overwrite c in buf with \0
  1711         crflag = (c == '\r');
  1714     buf[i] = '\0';
  1715     return i;
  1718 #ifdef DEBUG
  1719 const char *
  1720 TokenKindToString(TokenKind tt)
  1722     switch (tt) {
  1723       case TOK_ERROR:           return "TOK_ERROR";
  1724       case TOK_EOF:             return "TOK_EOF";
  1725       case TOK_EOL:             return "TOK_EOL";
  1726       case TOK_SEMI:            return "TOK_SEMI";
  1727       case TOK_COMMA:           return "TOK_COMMA";
  1728       case TOK_HOOK:            return "TOK_HOOK";
  1729       case TOK_COLON:           return "TOK_COLON";
  1730       case TOK_OR:              return "TOK_OR";
  1731       case TOK_AND:             return "TOK_AND";
  1732       case TOK_BITOR:           return "TOK_BITOR";
  1733       case TOK_BITXOR:          return "TOK_BITXOR";
  1734       case TOK_BITAND:          return "TOK_BITAND";
  1735       case TOK_ADD:             return "TOK_ADD";
  1736       case TOK_SUB:             return "TOK_SUB";
  1737       case TOK_MUL:             return "TOK_MUL";
  1738       case TOK_DIV:             return "TOK_DIV";
  1739       case TOK_MOD:             return "TOK_MOD";
  1740       case TOK_INC:             return "TOK_INC";
  1741       case TOK_DEC:             return "TOK_DEC";
  1742       case TOK_DOT:             return "TOK_DOT";
  1743       case TOK_TRIPLEDOT:       return "TOK_TRIPLEDOT";
  1744       case TOK_LB:              return "TOK_LB";
  1745       case TOK_RB:              return "TOK_RB";
  1746       case TOK_LC:              return "TOK_LC";
  1747       case TOK_RC:              return "TOK_RC";
  1748       case TOK_LP:              return "TOK_LP";
  1749       case TOK_RP:              return "TOK_RP";
  1750       case TOK_ARROW:           return "TOK_ARROW";
  1751       case TOK_NAME:            return "TOK_NAME";
  1752       case TOK_NUMBER:          return "TOK_NUMBER";
  1753       case TOK_STRING:          return "TOK_STRING";
  1754       case TOK_REGEXP:          return "TOK_REGEXP";
  1755       case TOK_TRUE:            return "TOK_TRUE";
  1756       case TOK_FALSE:           return "TOK_FALSE";
  1757       case TOK_NULL:            return "TOK_NULL";
  1758       case TOK_THIS:            return "TOK_THIS";
  1759       case TOK_FUNCTION:        return "TOK_FUNCTION";
  1760       case TOK_IF:              return "TOK_IF";
  1761       case TOK_ELSE:            return "TOK_ELSE";
  1762       case TOK_SWITCH:          return "TOK_SWITCH";
  1763       case TOK_CASE:            return "TOK_CASE";
  1764       case TOK_DEFAULT:         return "TOK_DEFAULT";
  1765       case TOK_WHILE:           return "TOK_WHILE";
  1766       case TOK_DO:              return "TOK_DO";
  1767       case TOK_FOR:             return "TOK_FOR";
  1768       case TOK_BREAK:           return "TOK_BREAK";
  1769       case TOK_CONTINUE:        return "TOK_CONTINUE";
  1770       case TOK_IN:              return "TOK_IN";
  1771       case TOK_VAR:             return "TOK_VAR";
  1772       case TOK_CONST:           return "TOK_CONST";
  1773       case TOK_WITH:            return "TOK_WITH";
  1774       case TOK_RETURN:          return "TOK_RETURN";
  1775       case TOK_NEW:             return "TOK_NEW";
  1776       case TOK_DELETE:          return "TOK_DELETE";
  1777       case TOK_TRY:             return "TOK_TRY";
  1778       case TOK_CATCH:           return "TOK_CATCH";
  1779       case TOK_FINALLY:         return "TOK_FINALLY";
  1780       case TOK_THROW:           return "TOK_THROW";
  1781       case TOK_INSTANCEOF:      return "TOK_INSTANCEOF";
  1782       case TOK_DEBUGGER:        return "TOK_DEBUGGER";
  1783       case TOK_YIELD:           return "TOK_YIELD";
  1784       case TOK_LET:             return "TOK_LET";
  1785       case TOK_RESERVED:        return "TOK_RESERVED";
  1786       case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
  1787       case TOK_STRICTEQ:        return "TOK_STRICTEQ";
  1788       case TOK_EQ:              return "TOK_EQ";
  1789       case TOK_STRICTNE:        return "TOK_STRICTNE";
  1790       case TOK_NE:              return "TOK_NE";
  1791       case TOK_TYPEOF:          return "TOK_TYPEOF";
  1792       case TOK_VOID:            return "TOK_VOID";
  1793       case TOK_NOT:             return "TOK_NOT";
  1794       case TOK_BITNOT:          return "TOK_BITNOT";
  1795       case TOK_LT:              return "TOK_LT";
  1796       case TOK_LE:              return "TOK_LE";
  1797       case TOK_GT:              return "TOK_GT";
  1798       case TOK_GE:              return "TOK_GE";
  1799       case TOK_LSH:             return "TOK_LSH";
  1800       case TOK_RSH:             return "TOK_RSH";
  1801       case TOK_URSH:            return "TOK_URSH";
  1802       case TOK_ASSIGN:          return "TOK_ASSIGN";
  1803       case TOK_ADDASSIGN:       return "TOK_ADDASSIGN";
  1804       case TOK_SUBASSIGN:       return "TOK_SUBASSIGN";
  1805       case TOK_BITORASSIGN:     return "TOK_BITORASSIGN";
  1806       case TOK_BITXORASSIGN:    return "TOK_BITXORASSIGN";
  1807       case TOK_BITANDASSIGN:    return "TOK_BITANDASSIGN";
  1808       case TOK_LSHASSIGN:       return "TOK_LSHASSIGN";
  1809       case TOK_RSHASSIGN:       return "TOK_RSHASSIGN";
  1810       case TOK_URSHASSIGN:      return "TOK_URSHASSIGN";
  1811       case TOK_MULASSIGN:       return "TOK_MULASSIGN";
  1812       case TOK_DIVASSIGN:       return "TOK_DIVASSIGN";
  1813       case TOK_MODASSIGN:       return "TOK_MODASSIGN";
  1814       case TOK_EXPORT:          return "TOK_EXPORT";
  1815       case TOK_IMPORT:          return "TOK_IMPORT";
  1816       case TOK_LIMIT:           break;
  1819     return "<bad TokenKind>";
  1821 #endif

mercurial