js/src/frontend/TokenStream.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/frontend/TokenStream.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,1821 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +// JS lexical scanner.
    1.11 +
    1.12 +#include "frontend/TokenStream.h"
    1.13 +
    1.14 +#include "mozilla/PodOperations.h"
    1.15 +
    1.16 +#include <ctype.h>
    1.17 +#include <stdarg.h>
    1.18 +#include <stdio.h>
    1.19 +#include <string.h>
    1.20 +
    1.21 +#include "jsatom.h"
    1.22 +#include "jscntxt.h"
    1.23 +#include "jsexn.h"
    1.24 +#include "jsnum.h"
    1.25 +#include "jsworkers.h"
    1.26 +
    1.27 +#include "frontend/BytecodeCompiler.h"
    1.28 +#include "js/CharacterEncoding.h"
    1.29 +#include "vm/Keywords.h"
    1.30 +#include "vm/StringBuffer.h"
    1.31 +
    1.32 +using namespace js;
    1.33 +using namespace js::frontend;
    1.34 +using namespace js::unicode;
    1.35 +
    1.36 +using mozilla::Maybe;
    1.37 +using mozilla::PodAssign;
    1.38 +using mozilla::PodCopy;
    1.39 +using mozilla::PodZero;
    1.40 +
    1.41 +struct KeywordInfo {
    1.42 +    const char  *chars;         // C string with keyword text
    1.43 +    TokenKind   tokentype;
    1.44 +    JSVersion   version;
    1.45 +};
    1.46 +
    1.47 +static const KeywordInfo keywords[] = {
    1.48 +#define KEYWORD_INFO(keyword, name, type, version) \
    1.49 +    {js_##keyword##_str, type, version},
    1.50 +    FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
    1.51 +#undef KEYWORD_INFO
    1.52 +};
    1.53 +
    1.54 +// Returns a KeywordInfo for the specified characters, or nullptr if the string
    1.55 +// is not a keyword.
    1.56 +static const KeywordInfo *
    1.57 +FindKeyword(const jschar *s, size_t length)
    1.58 +{
    1.59 +    JS_ASSERT(length != 0);
    1.60 +
    1.61 +    size_t i;
    1.62 +    const KeywordInfo *kw;
    1.63 +    const char *chars;
    1.64 +
    1.65 +#define JSKW_LENGTH()           length
    1.66 +#define JSKW_AT(column)         s[column]
    1.67 +#define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
    1.68 +#define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
    1.69 +#define JSKW_NO_MATCH()         goto no_match;
    1.70 +#include "jsautokw.h"
    1.71 +#undef JSKW_NO_MATCH
    1.72 +#undef JSKW_TEST_GUESS
    1.73 +#undef JSKW_GOT_MATCH
    1.74 +#undef JSKW_AT
    1.75 +#undef JSKW_LENGTH
    1.76 +
    1.77 +  got_match:
    1.78 +    return &keywords[i];
    1.79 +
    1.80 +  test_guess:
    1.81 +    kw = &keywords[i];
    1.82 +    chars = kw->chars;
    1.83 +    do {
    1.84 +        if (*s++ != (unsigned char)(*chars++))
    1.85 +            goto no_match;
    1.86 +    } while (--length != 0);
    1.87 +    return kw;
    1.88 +
    1.89 +  no_match:
    1.90 +    return nullptr;
    1.91 +}
    1.92 +
    1.93 +bool
    1.94 +frontend::IsIdentifier(JSLinearString *str)
    1.95 +{
    1.96 +    const jschar *chars = str->chars();
    1.97 +    size_t length = str->length();
    1.98 +
    1.99 +    if (length == 0)
   1.100 +        return false;
   1.101 +    jschar c = *chars;
   1.102 +    if (!IsIdentifierStart(c))
   1.103 +        return false;
   1.104 +    const jschar *end = chars + length;
   1.105 +    while (++chars != end) {
   1.106 +        c = *chars;
   1.107 +        if (!IsIdentifierPart(c))
   1.108 +            return false;
   1.109 +    }
   1.110 +    return true;
   1.111 +}
   1.112 +
   1.113 +bool
   1.114 +frontend::IsKeyword(JSLinearString *str)
   1.115 +{
   1.116 +    return FindKeyword(str->chars(), str->length()) != nullptr;
   1.117 +}
   1.118 +
   1.119 +TokenStream::SourceCoords::SourceCoords(ExclusiveContext *cx, uint32_t ln)
   1.120 +  : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
   1.121 +{
   1.122 +    // This is actually necessary!  Removing it causes compile errors on
   1.123 +    // GCC and clang.  You could try declaring this:
   1.124 +    //
   1.125 +    //   const uint32_t TokenStream::SourceCoords::MAX_PTR;
   1.126 +    //
   1.127 +    // which fixes the GCC/clang error, but causes bustage on Windows.  Sigh.
   1.128 +    //
   1.129 +    uint32_t maxPtr = MAX_PTR;
   1.130 +
   1.131 +    // The first line begins at buffer offset 0.  MAX_PTR is the sentinel.  The
   1.132 +    // appends cannot fail because |lineStartOffsets_| has statically-allocated
   1.133 +    // elements.
   1.134 +    JS_ASSERT(lineStartOffsets_.capacity() >= 2);
   1.135 +    (void)lineStartOffsets_.reserve(2);
   1.136 +    lineStartOffsets_.infallibleAppend(0);
   1.137 +    lineStartOffsets_.infallibleAppend(maxPtr);
   1.138 +}
   1.139 +
   1.140 +MOZ_ALWAYS_INLINE void
   1.141 +TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
   1.142 +{
   1.143 +    uint32_t lineIndex = lineNumToIndex(lineNum);
   1.144 +    uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
   1.145 +
   1.146 +    JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
   1.147 +
   1.148 +    if (lineIndex == sentinelIndex) {
   1.149 +        // We haven't seen this newline before.  Update lineStartOffsets_.
   1.150 +        // We ignore any failures due to OOM -- because we always have a
   1.151 +        // sentinel node, it'll just be like the newline wasn't present.  I.e.
   1.152 +        // the line numbers will be wrong, but the code won't crash or anything
   1.153 +        // like that.
   1.154 +        lineStartOffsets_[lineIndex] = lineStartOffset;
   1.155 +
   1.156 +        uint32_t maxPtr = MAX_PTR;
   1.157 +        (void)lineStartOffsets_.append(maxPtr);
   1.158 +
   1.159 +    } else {
   1.160 +        // We have seen this newline before (and ungot it).  Do nothing (other
   1.161 +        // than checking it hasn't mysteriously changed).
   1.162 +        JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset);
   1.163 +    }
   1.164 +}
   1.165 +
   1.166 +MOZ_ALWAYS_INLINE bool
   1.167 +TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other)
   1.168 +{
   1.169 +    JS_ASSERT(lineStartOffsets_.back() == MAX_PTR);
   1.170 +    JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
   1.171 +
   1.172 +    if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
   1.173 +        return true;
   1.174 +
   1.175 +    uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
   1.176 +    lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
   1.177 +
   1.178 +    for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
   1.179 +        if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
   1.180 +            return false;
   1.181 +    }
   1.182 +    return true;
   1.183 +}
   1.184 +
   1.185 +MOZ_ALWAYS_INLINE uint32_t
   1.186 +TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
   1.187 +{
   1.188 +    uint32_t iMin, iMax, iMid;
   1.189 +
   1.190 +    if (lineStartOffsets_[lastLineIndex_] <= offset) {
   1.191 +        // If we reach here, offset is on a line the same as or higher than
   1.192 +        // last time.  Check first for the +0, +1, +2 cases, because they
   1.193 +        // typically cover 85--98% of cases.
   1.194 +        if (offset < lineStartOffsets_[lastLineIndex_ + 1])
   1.195 +            return lastLineIndex_;      // lineIndex is same as last time
   1.196 +
   1.197 +        // If we reach here, there must be at least one more entry (plus the
   1.198 +        // sentinel).  Try it.
   1.199 +        lastLineIndex_++;
   1.200 +        if (offset < lineStartOffsets_[lastLineIndex_ + 1])
   1.201 +            return lastLineIndex_;      // lineIndex is one higher than last time
   1.202 +
   1.203 +        // The same logic applies here.
   1.204 +        lastLineIndex_++;
   1.205 +        if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
   1.206 +            return lastLineIndex_;      // lineIndex is two higher than last time
   1.207 +        }
   1.208 +
   1.209 +        // No luck.  Oh well, we have a better-than-default starting point for
   1.210 +        // the binary search.
   1.211 +        iMin = lastLineIndex_ + 1;
   1.212 +        JS_ASSERT(iMin < lineStartOffsets_.length() - 1);   // -1 due to the sentinel
   1.213 +
   1.214 +    } else {
   1.215 +        iMin = 0;
   1.216 +    }
   1.217 +
   1.218 +    // This is a binary search with deferred detection of equality, which was
   1.219 +    // marginally faster in this case than a standard binary search.
   1.220 +    // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
   1.221 +    // want one before that.
   1.222 +    iMax = lineStartOffsets_.length() - 2;
   1.223 +    while (iMax > iMin) {
   1.224 +        iMid = iMin + (iMax - iMin) / 2;
   1.225 +        if (offset >= lineStartOffsets_[iMid + 1])
   1.226 +            iMin = iMid + 1;    // offset is above lineStartOffsets_[iMid]
   1.227 +        else
   1.228 +            iMax = iMid;        // offset is below or within lineStartOffsets_[iMid]
   1.229 +    }
   1.230 +    JS_ASSERT(iMax == iMin);
   1.231 +    JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
   1.232 +    lastLineIndex_ = iMin;
   1.233 +    return iMin;
   1.234 +}
   1.235 +
   1.236 +uint32_t
   1.237 +TokenStream::SourceCoords::lineNum(uint32_t offset) const
   1.238 +{
   1.239 +    uint32_t lineIndex = lineIndexOf(offset);
   1.240 +    return lineIndexToNum(lineIndex);
   1.241 +}
   1.242 +
   1.243 +uint32_t
   1.244 +TokenStream::SourceCoords::columnIndex(uint32_t offset) const
   1.245 +{
   1.246 +    uint32_t lineIndex = lineIndexOf(offset);
   1.247 +    uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
   1.248 +    JS_ASSERT(offset >= lineStartOffset);
   1.249 +    return offset - lineStartOffset;
   1.250 +}
   1.251 +
   1.252 +void
   1.253 +TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum,
   1.254 +                                                 uint32_t *columnIndex) const
   1.255 +{
   1.256 +    uint32_t lineIndex = lineIndexOf(offset);
   1.257 +    *lineNum = lineIndexToNum(lineIndex);
   1.258 +    uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
   1.259 +    JS_ASSERT(offset >= lineStartOffset);
   1.260 +    *columnIndex = offset - lineStartOffset;
   1.261 +}
   1.262 +
   1.263 +#ifdef _MSC_VER
   1.264 +#pragma warning(push)
   1.265 +#pragma warning(disable:4351)
   1.266 +#endif
   1.267 +
   1.268 +// Initialize members that aren't initialized in |init|.
   1.269 +TokenStream::TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
   1.270 +                         const jschar *base, size_t length, StrictModeGetter *smg)
   1.271 +  : srcCoords(cx, options.lineno),
   1.272 +    options_(options),
   1.273 +    tokens(),
   1.274 +    cursor(),
   1.275 +    lookahead(),
   1.276 +    lineno(options.lineno),
   1.277 +    flags(),
   1.278 +    linebase(base - options.column),
   1.279 +    prevLinebase(nullptr),
   1.280 +    userbuf(cx, base - options.column, length + options.column), // See comment below
   1.281 +    filename(options.filename()),
   1.282 +    displayURL_(nullptr),
   1.283 +    sourceMapURL_(nullptr),
   1.284 +    tokenbuf(cx),
   1.285 +    cx(cx),
   1.286 +    originPrincipals(options.originPrincipals(cx)),
   1.287 +    strictModeGetter(smg)
   1.288 +{
   1.289 +    // The caller must ensure that a reference is held on the supplied principals
   1.290 +    // throughout compilation.
   1.291 +    JS_ASSERT_IF(originPrincipals, originPrincipals->refcount > 0);
   1.292 +
   1.293 +    // Column numbers are computed as offsets from the current line's base, so the
   1.294 +    // initial line's base must be included in the buffer. linebase and userbuf
   1.295 +    // were adjusted above, and if we are starting tokenization part way through
   1.296 +    // this line then adjust the next character.
   1.297 +    userbuf.setAddressOfNextRawChar(base);
   1.298 +
   1.299 +    // Nb: the following tables could be static, but initializing them here is
   1.300 +    // much easier.  Don't worry, the time to initialize them for each
   1.301 +    // TokenStream is trivial.  See bug 639420.
   1.302 +
   1.303 +    // See getChar() for an explanation of maybeEOL[].
   1.304 +    memset(maybeEOL, 0, sizeof(maybeEOL));
   1.305 +    maybeEOL[unsigned('\n')] = true;
   1.306 +    maybeEOL[unsigned('\r')] = true;
   1.307 +    maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
   1.308 +    maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
   1.309 +
   1.310 +    // See getTokenInternal() for an explanation of maybeStrSpecial[].
   1.311 +    memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
   1.312 +    maybeStrSpecial[unsigned('"')] = true;
   1.313 +    maybeStrSpecial[unsigned('\'')] = true;
   1.314 +    maybeStrSpecial[unsigned('\\')] = true;
   1.315 +    maybeStrSpecial[unsigned('\n')] = true;
   1.316 +    maybeStrSpecial[unsigned('\r')] = true;
   1.317 +    maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
   1.318 +    maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
   1.319 +    maybeStrSpecial[unsigned(EOF & 0xff)] = true;
   1.320 +
   1.321 +    // See Parser::assignExpr() for an explanation of isExprEnding[].
   1.322 +    memset(isExprEnding, 0, sizeof(isExprEnding));
   1.323 +    isExprEnding[TOK_COMMA] = 1;
   1.324 +    isExprEnding[TOK_SEMI]  = 1;
   1.325 +    isExprEnding[TOK_COLON] = 1;
   1.326 +    isExprEnding[TOK_RP]    = 1;
   1.327 +    isExprEnding[TOK_RB]    = 1;
   1.328 +    isExprEnding[TOK_RC]    = 1;
   1.329 +}
   1.330 +
   1.331 +#ifdef _MSC_VER
   1.332 +#pragma warning(pop)
   1.333 +#endif
   1.334 +
   1.335 +TokenStream::~TokenStream()
   1.336 +{
   1.337 +    js_free(displayURL_);
   1.338 +    js_free(sourceMapURL_);
   1.339 +
   1.340 +    JS_ASSERT_IF(originPrincipals, originPrincipals->refcount);
   1.341 +}
   1.342 +
   1.343 +// Use the fastest available getc.
   1.344 +#if defined(HAVE_GETC_UNLOCKED)
   1.345 +# define fast_getc getc_unlocked
   1.346 +#elif defined(HAVE__GETC_NOLOCK)
   1.347 +# define fast_getc _getc_nolock
   1.348 +#else
   1.349 +# define fast_getc getc
   1.350 +#endif
   1.351 +
   1.352 +MOZ_ALWAYS_INLINE void
   1.353 +TokenStream::updateLineInfoForEOL()
   1.354 +{
   1.355 +    prevLinebase = linebase;
   1.356 +    linebase = userbuf.addressOfNextRawChar();
   1.357 +    lineno++;
   1.358 +    srcCoords.add(lineno, linebase - userbuf.base());
   1.359 +}
   1.360 +
   1.361 +MOZ_ALWAYS_INLINE void
   1.362 +TokenStream::updateFlagsForEOL()
   1.363 +{
   1.364 +    flags.isDirtyLine = false;
   1.365 +}
   1.366 +
   1.367 +// This gets the next char, normalizing all EOL sequences to '\n' as it goes.
   1.368 +int32_t
   1.369 +TokenStream::getChar()
   1.370 +{
   1.371 +    int32_t c;
   1.372 +    if (MOZ_LIKELY(userbuf.hasRawChars())) {
   1.373 +        c = userbuf.getRawChar();
   1.374 +
   1.375 +        // Normalize the jschar if it was a newline.  We need to detect any of
   1.376 +        // these four characters:  '\n' (0x000a), '\r' (0x000d),
   1.377 +        // LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029).  Testing for each
   1.378 +        // one in turn is slow, so we use a single probabilistic check, and if
   1.379 +        // that succeeds, test for them individually.
   1.380 +        //
   1.381 +        // We use the bottom 8 bits to index into a lookup table, succeeding
   1.382 +        // when d&0xff is 0xa, 0xd, 0x28 or 0x29.  Among ASCII chars (which
   1.383 +        // are by the far the most common) this gives false positives for '('
   1.384 +        // (0x0028) and ')' (0x0029).  We could avoid those by incorporating
   1.385 +        // the 13th bit of d into the lookup, but that requires extra shifting
   1.386 +        // and masking and isn't worthwhile.  See TokenStream::TokenStream()
   1.387 +        // for the initialization of the relevant entries in the table.
   1.388 +        if (MOZ_UNLIKELY(maybeEOL[c & 0xff])) {
   1.389 +            if (c == '\n')
   1.390 +                goto eol;
   1.391 +            if (c == '\r') {
   1.392 +                // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
   1.393 +                if (userbuf.hasRawChars())
   1.394 +                    userbuf.matchRawChar('\n');
   1.395 +                goto eol;
   1.396 +            }
   1.397 +            if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
   1.398 +                goto eol;
   1.399 +        }
   1.400 +        return c;
   1.401 +    }
   1.402 +
   1.403 +    flags.isEOF = true;
   1.404 +    return EOF;
   1.405 +
   1.406 +  eol:
   1.407 +    updateLineInfoForEOL();
   1.408 +    return '\n';
   1.409 +}
   1.410 +
   1.411 +// This gets the next char. It does nothing special with EOL sequences, not
   1.412 +// even updating the line counters.  It can be used safely if (a) the
   1.413 +// resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
   1.414 +// it's an EOL, and (b) the line-related state (lineno, linebase) is not used
   1.415 +// before it's ungotten.
   1.416 +int32_t
   1.417 +TokenStream::getCharIgnoreEOL()
   1.418 +{
   1.419 +    if (MOZ_LIKELY(userbuf.hasRawChars()))
   1.420 +        return userbuf.getRawChar();
   1.421 +
   1.422 +    flags.isEOF = true;
   1.423 +    return EOF;
   1.424 +}
   1.425 +
   1.426 +void
   1.427 +TokenStream::ungetChar(int32_t c)
   1.428 +{
   1.429 +    if (c == EOF)
   1.430 +        return;
   1.431 +    JS_ASSERT(!userbuf.atStart());
   1.432 +    userbuf.ungetRawChar();
   1.433 +    if (c == '\n') {
   1.434 +#ifdef DEBUG
   1.435 +        int32_t c2 = userbuf.peekRawChar();
   1.436 +        JS_ASSERT(TokenBuf::isRawEOLChar(c2));
   1.437 +#endif
   1.438 +
   1.439 +        // If it's a \r\n sequence, also unget the \r.
   1.440 +        if (!userbuf.atStart())
   1.441 +            userbuf.matchRawCharBackwards('\r');
   1.442 +
   1.443 +        JS_ASSERT(prevLinebase);    // we should never get more than one EOL char
   1.444 +        linebase = prevLinebase;
   1.445 +        prevLinebase = nullptr;
   1.446 +        lineno--;
   1.447 +    } else {
   1.448 +        JS_ASSERT(userbuf.peekRawChar() == c);
   1.449 +    }
   1.450 +}
   1.451 +
   1.452 +void
   1.453 +TokenStream::ungetCharIgnoreEOL(int32_t c)
   1.454 +{
   1.455 +    if (c == EOF)
   1.456 +        return;
   1.457 +    JS_ASSERT(!userbuf.atStart());
   1.458 +    userbuf.ungetRawChar();
   1.459 +}
   1.460 +
   1.461 +// Return true iff |n| raw characters can be read from this without reading past
   1.462 +// EOF or a newline, and copy those characters into |cp| if so.  The characters
   1.463 +// are not consumed: use skipChars(n) to do so after checking that the consumed
   1.464 +// characters had appropriate values.
   1.465 +bool
   1.466 +TokenStream::peekChars(int n, jschar *cp)
   1.467 +{
   1.468 +    int i, j;
   1.469 +    int32_t c;
   1.470 +
   1.471 +    for (i = 0; i < n; i++) {
   1.472 +        c = getCharIgnoreEOL();
   1.473 +        if (c == EOF)
   1.474 +            break;
   1.475 +        if (c == '\n') {
   1.476 +            ungetCharIgnoreEOL(c);
   1.477 +            break;
   1.478 +        }
   1.479 +        cp[i] = jschar(c);
   1.480 +    }
   1.481 +    for (j = i - 1; j >= 0; j--)
   1.482 +        ungetCharIgnoreEOL(cp[j]);
   1.483 +    return i == n;
   1.484 +}
   1.485 +
   1.486 +const jschar *
   1.487 +TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max)
   1.488 +{
   1.489 +    JS_ASSERT(base_ <= p && p <= limit_);
   1.490 +
   1.491 +    size_t n = 0;
   1.492 +    while (true) {
   1.493 +        if (p >= limit_)
   1.494 +            break;
   1.495 +        if (n >= max)
   1.496 +            break;
   1.497 +        if (TokenBuf::isRawEOLChar(*p++))
   1.498 +            break;
   1.499 +        n++;
   1.500 +    }
   1.501 +    return p;
   1.502 +}
   1.503 +
   1.504 +void
   1.505 +TokenStream::advance(size_t position)
   1.506 +{
   1.507 +    const jschar *end = userbuf.base() + position;
   1.508 +    while (userbuf.addressOfNextRawChar() < end)
   1.509 +        getChar();
   1.510 +
   1.511 +    Token *cur = &tokens[cursor];
   1.512 +    cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base();
   1.513 +    cur->type = TOK_ERROR;
   1.514 +    lookahead = 0;
   1.515 +}
   1.516 +
   1.517 +void
   1.518 +TokenStream::tell(Position *pos)
   1.519 +{
   1.520 +    pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
   1.521 +    pos->flags = flags;
   1.522 +    pos->lineno = lineno;
   1.523 +    pos->linebase = linebase;
   1.524 +    pos->prevLinebase = prevLinebase;
   1.525 +    pos->lookahead = lookahead;
   1.526 +    pos->currentToken = currentToken();
   1.527 +    for (unsigned i = 0; i < lookahead; i++)
   1.528 +        pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
   1.529 +}
   1.530 +
   1.531 +void
   1.532 +TokenStream::seek(const Position &pos)
   1.533 +{
   1.534 +    userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
   1.535 +    flags = pos.flags;
   1.536 +    lineno = pos.lineno;
   1.537 +    linebase = pos.linebase;
   1.538 +    prevLinebase = pos.prevLinebase;
   1.539 +    lookahead = pos.lookahead;
   1.540 +
   1.541 +    tokens[cursor] = pos.currentToken;
   1.542 +    for (unsigned i = 0; i < lookahead; i++)
   1.543 +        tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
   1.544 +}
   1.545 +
   1.546 +bool
   1.547 +TokenStream::seek(const Position &pos, const TokenStream &other)
   1.548 +{
   1.549 +    if (!srcCoords.fill(other.srcCoords))
   1.550 +        return false;
   1.551 +    seek(pos);
   1.552 +    return true;
   1.553 +}
   1.554 +
   1.555 +bool
   1.556 +TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
   1.557 +                                           va_list args)
   1.558 +{
   1.559 +    // In strict mode code, this is an error, not merely a warning.
   1.560 +    unsigned flags = JSREPORT_STRICT;
   1.561 +    if (strictMode)
   1.562 +        flags |= JSREPORT_ERROR;
   1.563 +    else if (options().extraWarningsOption)
   1.564 +        flags |= JSREPORT_WARNING;
   1.565 +    else
   1.566 +        return true;
   1.567 +
   1.568 +    return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
   1.569 +}
   1.570 +
   1.571 +void
   1.572 +CompileError::throwError(JSContext *cx)
   1.573 +{
   1.574 +    // If there's a runtime exception type associated with this error
   1.575 +    // number, set that as the pending exception.  For errors occuring at
   1.576 +    // compile time, this is very likely to be a JSEXN_SYNTAXERR.
   1.577 +    //
   1.578 +    // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
   1.579 +    // flag will be set in report.flags.  Proper behavior for an error
   1.580 +    // reporter is to ignore a report with this flag for all but top-level
   1.581 +    // compilation errors.  The exception will remain pending, and so long
   1.582 +    // as the non-top-level "load", "eval", or "compile" native function
   1.583 +    // returns false, the top-level reporter will eventually receive the
   1.584 +    // uncaught exception report.
   1.585 +    if (!js_ErrorToException(cx, message, &report, nullptr, nullptr))
   1.586 +        CallErrorReporter(cx, message, &report);
   1.587 +}
   1.588 +
   1.589 +CompileError::~CompileError()
   1.590 +{
   1.591 +    js_free((void*)report.uclinebuf);
   1.592 +    js_free((void*)report.linebuf);
   1.593 +    js_free((void*)report.ucmessage);
   1.594 +    js_free(message);
   1.595 +    message = nullptr;
   1.596 +
   1.597 +    if (report.messageArgs) {
   1.598 +        if (argumentsType == ArgumentsAreASCII) {
   1.599 +            unsigned i = 0;
   1.600 +            while (report.messageArgs[i])
   1.601 +                js_free((void*)report.messageArgs[i++]);
   1.602 +        }
   1.603 +        js_free(report.messageArgs);
   1.604 +    }
   1.605 +
   1.606 +    PodZero(&report);
   1.607 +}
   1.608 +
   1.609 +bool
   1.610 +TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
   1.611 +                                        va_list args)
   1.612 +{
   1.613 +    bool warning = JSREPORT_IS_WARNING(flags);
   1.614 +
   1.615 +    if (warning && options().werrorOption) {
   1.616 +        flags &= ~JSREPORT_WARNING;
   1.617 +        warning = false;
   1.618 +    }
   1.619 +
   1.620 +    // On the main thread, report the error immediately. When compiling off
   1.621 +    // thread, save the error so that the main thread can report it later.
   1.622 +    CompileError tempErr;
   1.623 +    CompileError &err = cx->isJSContext() ? tempErr : cx->addPendingCompileError();
   1.624 +
   1.625 +    err.report.flags = flags;
   1.626 +    err.report.errorNumber = errorNumber;
   1.627 +    err.report.filename = filename;
   1.628 +    err.report.originPrincipals = originPrincipals;
   1.629 +    if (offset == NoOffset) {
   1.630 +        err.report.lineno = 0;
   1.631 +        err.report.column = 0;
   1.632 +    } else {
   1.633 +        err.report.lineno = srcCoords.lineNum(offset);
   1.634 +        err.report.column = srcCoords.columnIndex(offset);
   1.635 +    }
   1.636 +
   1.637 +    err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;
   1.638 +
   1.639 +    if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, nullptr, errorNumber, &err.message,
   1.640 +                                 &err.report, err.argumentsType, args))
   1.641 +    {
   1.642 +        return false;
   1.643 +    }
   1.644 +
   1.645 +    // Given a token, T, that we want to complain about: if T's (starting)
   1.646 +    // lineno doesn't match TokenStream's lineno, that means we've scanned past
   1.647 +    // the line that T starts on, which makes it hard to print some or all of
   1.648 +    // T's (starting) line for context.
   1.649 +    //
   1.650 +    // So we don't even try, leaving report.linebuf and friends zeroed.  This
   1.651 +    // means that any error involving a multi-line token (e.g. an unterminated
   1.652 +    // multi-line string literal) won't have a context printed.
   1.653 +    if (offset != NoOffset && err.report.lineno == lineno) {
   1.654 +        const jschar *tokenStart = userbuf.base() + offset;
   1.655 +
   1.656 +        // We show only a portion (a "window") of the line around the erroneous
   1.657 +        // token -- the first char in the token, plus |windowRadius| chars
   1.658 +        // before it and |windowRadius - 1| chars after it.  This is because
   1.659 +        // lines can be very long and printing the whole line is (a) not that
   1.660 +        // helpful, and (b) can waste a lot of memory.  See bug 634444.
   1.661 +        static const size_t windowRadius = 60;
   1.662 +
   1.663 +        // Truncate at the front if necessary.
   1.664 +        const jschar *windowBase = (linebase + windowRadius < tokenStart)
   1.665 +                                 ? tokenStart - windowRadius
   1.666 +                                 : linebase;
   1.667 +        uint32_t windowOffset = tokenStart - windowBase;
   1.668 +
   1.669 +        // Find EOL, or truncate at the back if necessary.
   1.670 +        const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius);
   1.671 +        size_t windowLength = windowLimit - windowBase;
   1.672 +        JS_ASSERT(windowLength <= windowRadius * 2);
   1.673 +
   1.674 +        // Create the windowed strings.
   1.675 +        StringBuffer windowBuf(cx);
   1.676 +        if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0))
   1.677 +            return false;
   1.678 +
   1.679 +        // Unicode and char versions of the window into the offending source
   1.680 +        // line, without final \n.
   1.681 +        err.report.uclinebuf = windowBuf.extractWellSized();
   1.682 +        if (!err.report.uclinebuf)
   1.683 +            return false;
   1.684 +        TwoByteChars tbchars(err.report.uclinebuf, windowLength);
   1.685 +        err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str();
   1.686 +        if (!err.report.linebuf)
   1.687 +            return false;
   1.688 +
   1.689 +        err.report.tokenptr = err.report.linebuf + windowOffset;
   1.690 +        err.report.uctokenptr = err.report.uclinebuf + windowOffset;
   1.691 +    }
   1.692 +
   1.693 +    if (cx->isJSContext())
   1.694 +        err.throwError(cx->asJSContext());
   1.695 +
   1.696 +    return warning;
   1.697 +}
   1.698 +
   1.699 +bool
   1.700 +TokenStream::reportStrictModeError(unsigned errorNumber, ...)
   1.701 +{
   1.702 +    va_list args;
   1.703 +    va_start(args, errorNumber);
   1.704 +    bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
   1.705 +                                                errorNumber, args);
   1.706 +    va_end(args);
   1.707 +    return result;
   1.708 +}
   1.709 +
   1.710 +bool
   1.711 +TokenStream::reportError(unsigned errorNumber, ...)
   1.712 +{
   1.713 +    va_list args;
   1.714 +    va_start(args, errorNumber);
   1.715 +    bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
   1.716 +                                             args);
   1.717 +    va_end(args);
   1.718 +    return result;
   1.719 +}
   1.720 +
   1.721 +bool
   1.722 +TokenStream::reportWarning(unsigned errorNumber, ...)
   1.723 +{
   1.724 +    va_list args;
   1.725 +    va_start(args, errorNumber);
   1.726 +    bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
   1.727 +                                             errorNumber, args);
   1.728 +    va_end(args);
   1.729 +    return result;
   1.730 +}
   1.731 +
   1.732 +bool
   1.733 +TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
   1.734 +{
   1.735 +    if (!options().extraWarningsOption)
   1.736 +        return true;
   1.737 +
   1.738 +    return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
   1.739 +}
   1.740 +
   1.741 +void
   1.742 +TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
   1.743 +{
   1.744 +    va_list args;
   1.745 +    va_start(args, errorNumber);
   1.746 +    reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args);
   1.747 +    va_end(args);
   1.748 +}
   1.749 +
   1.750 +// We have encountered a '\': check for a Unicode escape sequence after it.
   1.751 +// Return 'true' and the character code value (by value) if we found a
   1.752 +// Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
   1.753 +// advance along the buffer.
   1.754 +bool
   1.755 +TokenStream::peekUnicodeEscape(int *result)
   1.756 +{
   1.757 +    jschar cp[5];
   1.758 +
   1.759 +    if (peekChars(5, cp) && cp[0] == 'u' &&
   1.760 +        JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
   1.761 +        JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
   1.762 +    {
   1.763 +        *result = (((((JS7_UNHEX(cp[1]) << 4)
   1.764 +                + JS7_UNHEX(cp[2])) << 4)
   1.765 +              + JS7_UNHEX(cp[3])) << 4)
   1.766 +            + JS7_UNHEX(cp[4]);
   1.767 +        return true;
   1.768 +    }
   1.769 +    return false;
   1.770 +}
   1.771 +
   1.772 +bool
   1.773 +TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
   1.774 +{
   1.775 +    if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
   1.776 +        skipChars(5);
   1.777 +        return true;
   1.778 +    }
   1.779 +    return false;
   1.780 +}
   1.781 +
   1.782 +bool
   1.783 +TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
   1.784 +{
   1.785 +    if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
   1.786 +        skipChars(5);
   1.787 +        return true;
   1.788 +    }
   1.789 +    return false;
   1.790 +}
   1.791 +
   1.792 +// Helper function which returns true if the first length(q) characters in p are
   1.793 +// the same as the characters in q.
   1.794 +static bool
   1.795 +CharsMatch(const jschar *p, const char *q) {
   1.796 +    while (*q) {
   1.797 +        if (*p++ != *q++)
   1.798 +            return false;
   1.799 +    }
   1.800 +    return true;
   1.801 +}
   1.802 +
   1.803 +bool
   1.804 +TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
   1.805 +{
   1.806 +    // Match directive comments used in debugging, such as "//# sourceURL" and
   1.807 +    // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
   1.808 +    //
   1.809 +    // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
   1.810 +    // line comments containing a source mapping URL inside a multiline
   1.811 +    // comment. To avoid potentially expensive lookahead and backtracking, we
   1.812 +    // only check for this case if we encounter a '#' character.
   1.813 +
   1.814 +    if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
   1.815 +        return false;
   1.816 +    if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
   1.817 +        return false;
   1.818 +
   1.819 +    return true;
   1.820 +}
   1.821 +
   1.822 +bool
   1.823 +TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
   1.824 +                          const char *directive, int directiveLength,
   1.825 +                          const char *errorMsgPragma, jschar **destination) {
   1.826 +    JS_ASSERT(directiveLength <= 18);
   1.827 +    jschar peeked[18];
   1.828 +    int32_t c;
   1.829 +
   1.830 +    if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
   1.831 +        if (shouldWarnDeprecated &&
   1.832 +            !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
   1.833 +            return false;
   1.834 +
   1.835 +        skipChars(directiveLength);
   1.836 +        tokenbuf.clear();
   1.837 +
   1.838 +        while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
   1.839 +            getChar();
   1.840 +            // Debugging directives can occur in both single- and multi-line
   1.841 +            // comments. If we're currently inside a multi-line comment, we also
   1.842 +            // need to recognize multi-line comment terminators.
   1.843 +            if (isMultiline && c == '*' && peekChar() == '/') {
   1.844 +                ungetChar('*');
   1.845 +                break;
   1.846 +            }
   1.847 +            tokenbuf.append(c);
   1.848 +        }
   1.849 +
   1.850 +        if (tokenbuf.empty())
   1.851 +            // The directive's URL was missing, but this is not quite an
   1.852 +            // exception that we should stop and drop everything for.
   1.853 +            return true;
   1.854 +
   1.855 +        size_t length = tokenbuf.length();
   1.856 +
   1.857 +        js_free(*destination);
   1.858 +        *destination = cx->pod_malloc<jschar>(length + 1);
   1.859 +        if (!*destination)
   1.860 +            return false;
   1.861 +
   1.862 +        PodCopy(*destination, tokenbuf.begin(), length);
   1.863 +        (*destination)[length] = '\0';
   1.864 +    }
   1.865 +
   1.866 +    return true;
   1.867 +}
   1.868 +
   1.869 +bool
   1.870 +TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
   1.871 +{
   1.872 +    // Match comments of the form "//# sourceURL=<url>" or
   1.873 +    // "/\* //# sourceURL=<url> *\/"
   1.874 +    //
   1.875 +    // Note that while these are labeled "sourceURL" in the source text,
   1.876 +    // internally we refer to it as a "displayURL" to distinguish what the
   1.877 +    // developer would like to refer to the source as from the source's actual
   1.878 +    // URL.
   1.879 +
   1.880 +    return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
   1.881 +                        "sourceURL", &displayURL_);
   1.882 +}
   1.883 +
   1.884 +bool
   1.885 +TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
   1.886 +{
   1.887 +    // Match comments of the form "//# sourceMappingURL=<url>" or
   1.888 +    // "/\* //# sourceMappingURL=<url> *\/"
   1.889 +
   1.890 +    return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
   1.891 +                        "sourceMappingURL", &sourceMapURL_);
   1.892 +}
   1.893 +
   1.894 +MOZ_ALWAYS_INLINE Token *
   1.895 +TokenStream::newToken(ptrdiff_t adjust)
   1.896 +{
   1.897 +    cursor = (cursor + 1) & ntokensMask;
   1.898 +    Token *tp = &tokens[cursor];
   1.899 +    tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base();
   1.900 +
   1.901 +    // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
   1.902 +    MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
   1.903 +
   1.904 +    return tp;
   1.905 +}
   1.906 +
   1.907 +MOZ_ALWAYS_INLINE JSAtom *
   1.908 +TokenStream::atomize(ExclusiveContext *cx, CharBuffer &cb)
   1.909 +{
   1.910 +    return AtomizeChars(cx, cb.begin(), cb.length());
   1.911 +}
   1.912 +
   1.913 +#ifdef DEBUG
   1.914 +static bool
   1.915 +IsTokenSane(Token *tp)
   1.916 +{
   1.917 +    // Nb: TOK_EOL should never be used in an actual Token;  it should only be
   1.918 +    // returned as a TokenKind from peekTokenSameLine().
   1.919 +    if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
   1.920 +        return false;
   1.921 +
   1.922 +    if (tp->pos.end < tp->pos.begin)
   1.923 +        return false;
   1.924 +
   1.925 +    return true;
   1.926 +}
   1.927 +#endif
   1.928 +
   1.929 +bool
   1.930 +TokenStream::putIdentInTokenbuf(const jschar *identStart)
   1.931 +{
   1.932 +    int32_t c, qc;
   1.933 +    const jschar *tmp = userbuf.addressOfNextRawChar();
   1.934 +    userbuf.setAddressOfNextRawChar(identStart);
   1.935 +
   1.936 +    tokenbuf.clear();
   1.937 +    for (;;) {
   1.938 +        c = getCharIgnoreEOL();
   1.939 +        if (!IsIdentifierPart(c)) {
   1.940 +            if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
   1.941 +                break;
   1.942 +            c = qc;
   1.943 +        }
   1.944 +        if (!tokenbuf.append(c)) {
   1.945 +            userbuf.setAddressOfNextRawChar(tmp);
   1.946 +            return false;
   1.947 +        }
   1.948 +    }
   1.949 +    userbuf.setAddressOfNextRawChar(tmp);
   1.950 +    return true;
   1.951 +}
   1.952 +
   1.953 +bool
   1.954 +TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp)
   1.955 +{
   1.956 +    const KeywordInfo *kw = FindKeyword(s, length);
   1.957 +    if (!kw)
   1.958 +        return true;
   1.959 +
   1.960 +    if (kw->tokentype == TOK_RESERVED)
   1.961 +        return reportError(JSMSG_RESERVED_ID, kw->chars);
   1.962 +
   1.963 +    if (kw->tokentype != TOK_STRICT_RESERVED) {
   1.964 +        if (kw->version <= versionNumber()) {
   1.965 +            // Working keyword.
   1.966 +            if (ttp) {
   1.967 +                *ttp = kw->tokentype;
   1.968 +                return true;
   1.969 +            }
   1.970 +            return reportError(JSMSG_RESERVED_ID, kw->chars);
   1.971 +        }
   1.972 +
   1.973 +        // The keyword is not in this version. Treat it as an identifier, unless
   1.974 +        // it is let which we treat as TOK_STRICT_RESERVED by falling through to
   1.975 +        // the code below (ES5 forbids it in strict mode).
   1.976 +        if (kw->tokentype != TOK_LET)
   1.977 +            return true;
   1.978 +    }
   1.979 +
   1.980 +    // Strict reserved word.
   1.981 +    return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
   1.982 +}
   1.983 +
   1.984 +enum FirstCharKind {
   1.985 +    // A jschar has the 'OneChar' kind if it, by itself, constitutes a valid
   1.986 +    // token that cannot also be a prefix of a longer token.  E.g. ';' has the
   1.987 +    // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
   1.988 +    // that begin with '+'.
   1.989 +    //
   1.990 +    // The few token kinds satisfying these properties cover roughly 35--45%
   1.991 +    // of the tokens seen in practice.
   1.992 +    //
   1.993 +    // We represent the 'OneChar' kind with any positive value less than
   1.994 +    // TOK_LIMIT.  This representation lets us associate each one-char token
   1.995 +    // jschar with a TokenKind and thus avoid a subsequent jschar-to-TokenKind
   1.996 +    // conversion.
   1.997 +    OneChar_Min = 0,
   1.998 +    OneChar_Max = TOK_LIMIT - 1,
   1.999 +
  1.1000 +    Space = TOK_LIMIT,
  1.1001 +    Ident,
  1.1002 +    Dec,
  1.1003 +    String,
  1.1004 +    EOL,
  1.1005 +    BasePrefix,
  1.1006 +    Other,
  1.1007 +
  1.1008 +    LastCharKind = Other
  1.1009 +};
  1.1010 +
  1.1011 +// OneChar: 40,  41,  44,  58,  59,  63,  91,  93,  123, 125, 126:
  1.1012 +//          '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
  1.1013 +// Ident:   36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
  1.1014 +// Dot:     46: '.'
  1.1015 +// Equals:  61: '='
  1.1016 +// String:  34, 39: '"', '\''
  1.1017 +// Dec:     49..57: '1'..'9'
  1.1018 +// Plus:    43: '+'
  1.1019 +// BasePrefix:  48: '0'
  1.1020 +// Space:   9, 11, 12, 32: '\t', '\v', '\f', ' '
  1.1021 +// EOL:     10, 13: '\n', '\r'
  1.1022 +//
  1.1023 +#define T_COMMA     TOK_COMMA
  1.1024 +#define T_COLON     TOK_COLON
  1.1025 +#define T_BITNOT    TOK_BITNOT
  1.1026 +#define _______ Other
  1.1027 +static const uint8_t firstCharKinds[] = {
  1.1028 +/*         0        1        2        3        4        5        6        7        8        9    */
  1.1029 +/*   0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______,   Space,
  1.1030 +/*  10+ */     EOL,   Space,   Space,     EOL, _______, _______, _______, _______, _______, _______,
  1.1031 +/*  20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
  1.1032 +/*  30+ */ _______, _______,   Space, _______,  String, _______,   Ident, _______, _______,  String,
  1.1033 +/*  40+ */  TOK_LP,  TOK_RP, _______, _______, T_COMMA,_______,  _______, _______,BasePrefix,  Dec,
  1.1034 +/*  50+ */     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,     Dec,    Dec,  T_COLON,TOK_SEMI,
  1.1035 +/*  60+ */ _______, _______, _______,TOK_HOOK, _______,   Ident,   Ident,   Ident,   Ident,   Ident,
  1.1036 +/*  70+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1.1037 +/*  80+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1.1038 +/*  90+ */   Ident,  TOK_LB, _______,  TOK_RB, _______,   Ident, _______,   Ident,   Ident,   Ident,
  1.1039 +/* 100+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1.1040 +/* 110+ */   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,   Ident,
  1.1041 +/* 120+ */   Ident,   Ident,   Ident,  TOK_LC, _______,  TOK_RC,T_BITNOT, _______
  1.1042 +};
  1.1043 +#undef T_COMMA
  1.1044 +#undef T_COLON
  1.1045 +#undef T_BITNOT
  1.1046 +#undef _______
  1.1047 +
  1.1048 +static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
  1.1049 +              "Elements of firstCharKinds[] are too small");
  1.1050 +
  1.1051 +TokenKind
  1.1052 +TokenStream::getTokenInternal(Modifier modifier)
  1.1053 +{
  1.1054 +    int c, qc;
  1.1055 +    Token *tp;
  1.1056 +    FirstCharKind c1kind;
  1.1057 +    const jschar *numStart;
  1.1058 +    bool hasExp;
  1.1059 +    DecimalPoint decimalPoint;
  1.1060 +    const jschar *identStart;
  1.1061 +    bool hadUnicodeEscape;
  1.1062 +
  1.1063 +  retry:
  1.1064 +    if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
  1.1065 +        tp = newToken(0);
  1.1066 +        tp->type = TOK_EOF;
  1.1067 +        flags.isEOF = true;
  1.1068 +        goto out;
  1.1069 +    }
  1.1070 +
  1.1071 +    c = userbuf.getRawChar();
  1.1072 +    JS_ASSERT(c != EOF);
  1.1073 +
  1.1074 +    // Chars not in the range 0..127 are rare.  Getting them out of the way
  1.1075 +    // early allows subsequent checking to be faster.
  1.1076 +    if (MOZ_UNLIKELY(c >= 128)) {
  1.1077 +        if (IsSpaceOrBOM2(c)) {
  1.1078 +            if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
  1.1079 +                updateLineInfoForEOL();
  1.1080 +                updateFlagsForEOL();
  1.1081 +            }
  1.1082 +
  1.1083 +            goto retry;
  1.1084 +        }
  1.1085 +
  1.1086 +        tp = newToken(-1);
  1.1087 +
  1.1088 +        // '$' and '_' don't pass IsLetter, but they're < 128 so never appear here.
  1.1089 +        JS_STATIC_ASSERT('$' < 128 && '_' < 128);
  1.1090 +        if (IsLetter(c)) {
  1.1091 +            identStart = userbuf.addressOfNextRawChar() - 1;
  1.1092 +            hadUnicodeEscape = false;
  1.1093 +            goto identifier;
  1.1094 +        }
  1.1095 +
  1.1096 +        goto badchar;
  1.1097 +    }
  1.1098 +
  1.1099 +    // Get the token kind, based on the first char.  The ordering of c1kind
  1.1100 +    // comparison is based on the frequency of tokens in real code -- Parsemark
  1.1101 +    // (which represents typical JS code on the web) and the Unreal demo (which
  1.1102 +    // represents asm.js code).
  1.1103 +    //
  1.1104 +    //                  Parsemark   Unreal
  1.1105 +    //  OneChar         32.9%       39.7%
  1.1106 +    //  Space           25.0%        0.6%
  1.1107 +    //  Ident           19.2%       36.4%
  1.1108 +    //  Dec              7.2%        5.1%
  1.1109 +    //  String           7.9%        0.0%
  1.1110 +    //  EOL              1.7%        0.0%
  1.1111 +    //  BasePrefix       0.4%        4.9%
  1.1112 +    //  Other            5.7%       13.3%
  1.1113 +    //
  1.1114 +    // The ordering is based mostly only Parsemark frequencies, with Unreal
  1.1115 +    // frequencies used to break close categories (e.g. |Dec| and |String|).
  1.1116 +    // |Other| is biggish, but no other token kind is common enough for it to
  1.1117 +    // be worth adding extra values to FirstCharKind.
  1.1118 +    //
  1.1119 +    c1kind = FirstCharKind(firstCharKinds[c]);
  1.1120 +
  1.1121 +    // Look for an unambiguous single-char token.
  1.1122 +    //
  1.1123 +    if (c1kind <= OneChar_Max) {
  1.1124 +        tp = newToken(-1);
  1.1125 +        tp->type = TokenKind(c1kind);
  1.1126 +        goto out;
  1.1127 +    }
  1.1128 +
  1.1129 +    // Skip over non-EOL whitespace chars.
  1.1130 +    //
  1.1131 +    if (c1kind == Space)
  1.1132 +        goto retry;
  1.1133 +
  1.1134 +    // Look for an identifier.
  1.1135 +    //
  1.1136 +    if (c1kind == Ident) {
  1.1137 +        tp = newToken(-1);
  1.1138 +        identStart = userbuf.addressOfNextRawChar() - 1;
  1.1139 +        hadUnicodeEscape = false;
  1.1140 +
  1.1141 +      identifier:
  1.1142 +        for (;;) {
  1.1143 +            c = getCharIgnoreEOL();
  1.1144 +            if (c == EOF)
  1.1145 +                break;
  1.1146 +            if (!IsIdentifierPart(c)) {
  1.1147 +                if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
  1.1148 +                    break;
  1.1149 +                hadUnicodeEscape = true;
  1.1150 +            }
  1.1151 +        }
  1.1152 +        ungetCharIgnoreEOL(c);
  1.1153 +
  1.1154 +        // Identifiers containing no Unicode escapes can be processed directly
  1.1155 +        // from userbuf.  The rest must use the escapes converted via tokenbuf
  1.1156 +        // before atomizing.
  1.1157 +        const jschar *chars;
  1.1158 +        size_t length;
  1.1159 +        if (hadUnicodeEscape) {
  1.1160 +            if (!putIdentInTokenbuf(identStart))
  1.1161 +                goto error;
  1.1162 +
  1.1163 +            chars = tokenbuf.begin();
  1.1164 +            length = tokenbuf.length();
  1.1165 +        } else {
  1.1166 +            chars = identStart;
  1.1167 +            length = userbuf.addressOfNextRawChar() - identStart;
  1.1168 +        }
  1.1169 +
  1.1170 +        // Check for keywords unless the parser told us not to.
  1.1171 +        if (modifier != KeywordIsName) {
  1.1172 +            tp->type = TOK_NAME;
  1.1173 +            if (!checkForKeyword(chars, length, &tp->type))
  1.1174 +                goto error;
  1.1175 +            if (tp->type != TOK_NAME)
  1.1176 +                goto out;
  1.1177 +        }
  1.1178 +
  1.1179 +        JSAtom *atom = AtomizeChars(cx, chars, length);
  1.1180 +        if (!atom)
  1.1181 +            goto error;
  1.1182 +        tp->type = TOK_NAME;
  1.1183 +        tp->setName(atom->asPropertyName());
  1.1184 +        goto out;
  1.1185 +    }
  1.1186 +
  1.1187 +    // Look for a decimal number.
  1.1188 +    //
  1.1189 +    if (c1kind == Dec) {
  1.1190 +        tp = newToken(-1);
  1.1191 +        numStart = userbuf.addressOfNextRawChar() - 1;
  1.1192 +
  1.1193 +      decimal:
  1.1194 +        decimalPoint = NoDecimal;
  1.1195 +        hasExp = false;
  1.1196 +        while (JS7_ISDEC(c))
  1.1197 +            c = getCharIgnoreEOL();
  1.1198 +
  1.1199 +        if (c == '.') {
  1.1200 +            decimalPoint = HasDecimal;
  1.1201 +          decimal_dot:
  1.1202 +            do {
  1.1203 +                c = getCharIgnoreEOL();
  1.1204 +            } while (JS7_ISDEC(c));
  1.1205 +        }
  1.1206 +        if (c == 'e' || c == 'E') {
  1.1207 +            hasExp = true;
  1.1208 +            c = getCharIgnoreEOL();
  1.1209 +            if (c == '+' || c == '-')
  1.1210 +                c = getCharIgnoreEOL();
  1.1211 +            if (!JS7_ISDEC(c)) {
  1.1212 +                ungetCharIgnoreEOL(c);
  1.1213 +                reportError(JSMSG_MISSING_EXPONENT);
  1.1214 +                goto error;
  1.1215 +            }
  1.1216 +            do {
  1.1217 +                c = getCharIgnoreEOL();
  1.1218 +            } while (JS7_ISDEC(c));
  1.1219 +        }
  1.1220 +        ungetCharIgnoreEOL(c);
  1.1221 +
  1.1222 +        if (c != EOF && IsIdentifierStart(c)) {
  1.1223 +            reportError(JSMSG_IDSTART_AFTER_NUMBER);
  1.1224 +            goto error;
  1.1225 +        }
  1.1226 +
  1.1227 +        // Unlike identifiers and strings, numbers cannot contain escaped
  1.1228 +        // chars, so we don't need to use tokenbuf.  Instead we can just
  1.1229 +        // convert the jschars in userbuf directly to the numeric value.
  1.1230 +        double dval;
  1.1231 +        if (!((decimalPoint == HasDecimal) || hasExp)) {
  1.1232 +            if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
  1.1233 +                goto error;
  1.1234 +        } else {
  1.1235 +            const jschar *dummy;
  1.1236 +            if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
  1.1237 +                goto error;
  1.1238 +        }
  1.1239 +        tp->type = TOK_NUMBER;
  1.1240 +        tp->setNumber(dval, decimalPoint);
  1.1241 +        goto out;
  1.1242 +    }
  1.1243 +
  1.1244 +    // Look for a string.
  1.1245 +    //
  1.1246 +    if (c1kind == String) {
  1.1247 +        tp = newToken(-1);
  1.1248 +        qc = c;
  1.1249 +        tokenbuf.clear();
  1.1250 +        while (true) {
  1.1251 +            // We need to detect any of these chars:  " or ', \n (or its
  1.1252 +            // equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
  1.1253 +            // similar to maybeEOL[], see above.  Because we detect EOL
  1.1254 +            // sequences here and put them back immediately, we can use
  1.1255 +            // getCharIgnoreEOL().
  1.1256 +            c = getCharIgnoreEOL();
  1.1257 +            if (maybeStrSpecial[c & 0xff]) {
  1.1258 +                if (c == qc)
  1.1259 +                    break;
  1.1260 +                if (c == '\\') {
  1.1261 +                    switch (c = getChar()) {
  1.1262 +                      case 'b': c = '\b'; break;
  1.1263 +                      case 'f': c = '\f'; break;
  1.1264 +                      case 'n': c = '\n'; break;
  1.1265 +                      case 'r': c = '\r'; break;
  1.1266 +                      case 't': c = '\t'; break;
  1.1267 +                      case 'v': c = '\v'; break;
  1.1268 +
  1.1269 +                      default:
  1.1270 +                        if ('0' <= c && c < '8') {
  1.1271 +                            int32_t val = JS7_UNDEC(c);
  1.1272 +
  1.1273 +                            c = peekChar();
  1.1274 +                            // Strict mode code allows only \0, then a non-digit.
  1.1275 +                            if (val != 0 || JS7_ISDEC(c)) {
  1.1276 +                                if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
  1.1277 +                                    goto error;
  1.1278 +                                flags.sawOctalEscape = true;
  1.1279 +                            }
  1.1280 +                            if ('0' <= c && c < '8') {
  1.1281 +                                val = 8 * val + JS7_UNDEC(c);
  1.1282 +                                getChar();
  1.1283 +                                c = peekChar();
  1.1284 +                                if ('0' <= c && c < '8') {
  1.1285 +                                    int32_t save = val;
  1.1286 +                                    val = 8 * val + JS7_UNDEC(c);
  1.1287 +                                    if (val <= 0377)
  1.1288 +                                        getChar();
  1.1289 +                                    else
  1.1290 +                                        val = save;
  1.1291 +                                }
  1.1292 +                            }
  1.1293 +
  1.1294 +                            c = jschar(val);
  1.1295 +                        } else if (c == 'u') {
  1.1296 +                            jschar cp[4];
  1.1297 +                            if (peekChars(4, cp) &&
  1.1298 +                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
  1.1299 +                                JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
  1.1300 +                                c = (((((JS7_UNHEX(cp[0]) << 4)
  1.1301 +                                        + JS7_UNHEX(cp[1])) << 4)
  1.1302 +                                      + JS7_UNHEX(cp[2])) << 4)
  1.1303 +                                    + JS7_UNHEX(cp[3]);
  1.1304 +                                skipChars(4);
  1.1305 +                            } else {
  1.1306 +                                reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
  1.1307 +                                goto error;
  1.1308 +                            }
  1.1309 +                        } else if (c == 'x') {
  1.1310 +                            jschar cp[2];
  1.1311 +                            if (peekChars(2, cp) &&
  1.1312 +                                JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
  1.1313 +                                c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
  1.1314 +                                skipChars(2);
  1.1315 +                            } else {
  1.1316 +                                reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
  1.1317 +                                goto error;
  1.1318 +                            }
  1.1319 +                        } else if (c == '\n') {
  1.1320 +                            // ES5 7.8.4: an escaped line terminator represents
  1.1321 +                            // no character.
  1.1322 +                            continue;
  1.1323 +                        }
  1.1324 +                        break;
  1.1325 +                    }
  1.1326 +                } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
  1.1327 +                    ungetCharIgnoreEOL(c);
  1.1328 +                    reportError(JSMSG_UNTERMINATED_STRING);
  1.1329 +                    goto error;
  1.1330 +                }
  1.1331 +            }
  1.1332 +            if (!tokenbuf.append(c))
  1.1333 +                goto error;
  1.1334 +        }
  1.1335 +        JSAtom *atom = atomize(cx, tokenbuf);
  1.1336 +        if (!atom)
  1.1337 +            goto error;
  1.1338 +        tp->type = TOK_STRING;
  1.1339 +        tp->setAtom(atom);
  1.1340 +        goto out;
  1.1341 +    }
  1.1342 +
  1.1343 +    // Skip over EOL chars, updating line state along the way.
  1.1344 +    //
  1.1345 +    if (c1kind == EOL) {
  1.1346 +        // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
  1.1347 +        if (c == '\r' && userbuf.hasRawChars())
  1.1348 +            userbuf.matchRawChar('\n');
  1.1349 +        updateLineInfoForEOL();
  1.1350 +        updateFlagsForEOL();
  1.1351 +        goto retry;
  1.1352 +    }
  1.1353 +
  1.1354 +    // Look for a hexadecimal, octal, or binary number.
  1.1355 +    //
  1.1356 +    if (c1kind == BasePrefix) {
  1.1357 +        tp = newToken(-1);
  1.1358 +        int radix;
  1.1359 +        c = getCharIgnoreEOL();
  1.1360 +        if (c == 'x' || c == 'X') {
  1.1361 +            radix = 16;
  1.1362 +            c = getCharIgnoreEOL();
  1.1363 +            if (!JS7_ISHEX(c)) {
  1.1364 +                ungetCharIgnoreEOL(c);
  1.1365 +                reportError(JSMSG_MISSING_HEXDIGITS);
  1.1366 +                goto error;
  1.1367 +            }
  1.1368 +            numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0x'
  1.1369 +            while (JS7_ISHEX(c))
  1.1370 +                c = getCharIgnoreEOL();
  1.1371 +        } else if (c == 'b' || c == 'B') {
  1.1372 +            radix = 2;
  1.1373 +            c = getCharIgnoreEOL();
  1.1374 +            if (c != '0' && c != '1') {
  1.1375 +                ungetCharIgnoreEOL(c);
  1.1376 +                reportError(JSMSG_MISSING_BINARY_DIGITS);
  1.1377 +                goto error;
  1.1378 +            }
  1.1379 +            numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0b'
  1.1380 +            while (c == '0' || c == '1')
  1.1381 +                c = getCharIgnoreEOL();
  1.1382 +        } else if (c == 'o' || c == 'O') {
  1.1383 +            radix = 8;
  1.1384 +            c = getCharIgnoreEOL();
  1.1385 +            if (c < '0' || c > '7') {
  1.1386 +                ungetCharIgnoreEOL(c);
  1.1387 +                reportError(JSMSG_MISSING_OCTAL_DIGITS);
  1.1388 +                goto error;
  1.1389 +            }
  1.1390 +            numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0o'
  1.1391 +            while ('0' <= c && c <= '7')
  1.1392 +                c = getCharIgnoreEOL();
  1.1393 +        } else if (JS7_ISDEC(c)) {
  1.1394 +            radix = 8;
  1.1395 +            numStart = userbuf.addressOfNextRawChar() - 1;  // one past the '0'
  1.1396 +            while (JS7_ISDEC(c)) {
  1.1397 +                // Octal integer literals are not permitted in strict mode code.
  1.1398 +                if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
  1.1399 +                    goto error;
  1.1400 +
  1.1401 +                // Outside strict mode, we permit 08 and 09 as decimal numbers,
  1.1402 +                // which makes our behaviour a superset of the ECMA numeric
  1.1403 +                // grammar. We might not always be so permissive, so we warn
  1.1404 +                // about it.
  1.1405 +                if (c >= '8') {
  1.1406 +                    if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
  1.1407 +                        goto error;
  1.1408 +                    }
  1.1409 +                    goto decimal;   // use the decimal scanner for the rest of the number
  1.1410 +                }
  1.1411 +                c = getCharIgnoreEOL();
  1.1412 +            }
  1.1413 +        } else {
  1.1414 +            // '0' not followed by 'x', 'X' or a digit;  scan as a decimal number.
  1.1415 +            numStart = userbuf.addressOfNextRawChar() - 1;
  1.1416 +            goto decimal;
  1.1417 +        }
  1.1418 +        ungetCharIgnoreEOL(c);
  1.1419 +
  1.1420 +        if (c != EOF && IsIdentifierStart(c)) {
  1.1421 +            reportError(JSMSG_IDSTART_AFTER_NUMBER);
  1.1422 +            goto error;
  1.1423 +        }
  1.1424 +
  1.1425 +        double dval;
  1.1426 +        const jschar *dummy;
  1.1427 +        if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
  1.1428 +            goto error;
  1.1429 +        tp->type = TOK_NUMBER;
  1.1430 +        tp->setNumber(dval, NoDecimal);
  1.1431 +        goto out;
  1.1432 +    }
  1.1433 +
  1.1434 +    // This handles everything else.
  1.1435 +    //
  1.1436 +    JS_ASSERT(c1kind == Other);
  1.1437 +    tp = newToken(-1);
  1.1438 +    switch (c) {
  1.1439 +      case '.':
  1.1440 +        c = getCharIgnoreEOL();
  1.1441 +        if (JS7_ISDEC(c)) {
  1.1442 +            numStart = userbuf.addressOfNextRawChar() - 2;
  1.1443 +            decimalPoint = HasDecimal;
  1.1444 +            hasExp = false;
  1.1445 +            goto decimal_dot;
  1.1446 +        }
  1.1447 +        if (c == '.') {
  1.1448 +            if (matchChar('.')) {
  1.1449 +                tp->type = TOK_TRIPLEDOT;
  1.1450 +                goto out;
  1.1451 +            }
  1.1452 +        }
  1.1453 +        ungetCharIgnoreEOL(c);
  1.1454 +        tp->type = TOK_DOT;
  1.1455 +        goto out;
  1.1456 +
  1.1457 +      case '=':
  1.1458 +        if (matchChar('='))
  1.1459 +            tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
  1.1460 +        else if (matchChar('>'))
  1.1461 +            tp->type = TOK_ARROW;
  1.1462 +        else
  1.1463 +            tp->type = TOK_ASSIGN;
  1.1464 +        goto out;
  1.1465 +
  1.1466 +      case '+':
  1.1467 +        if (matchChar('+'))
  1.1468 +            tp->type = TOK_INC;
  1.1469 +        else
  1.1470 +            tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
  1.1471 +        goto out;
  1.1472 +
  1.1473 +      case '\\':
  1.1474 +        hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
  1.1475 +        if (hadUnicodeEscape) {
  1.1476 +            identStart = userbuf.addressOfNextRawChar() - 6;
  1.1477 +            goto identifier;
  1.1478 +        }
  1.1479 +        goto badchar;
  1.1480 +
  1.1481 +      case '|':
  1.1482 +        if (matchChar('|'))
  1.1483 +            tp->type = TOK_OR;
  1.1484 +        else
  1.1485 +            tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
  1.1486 +        goto out;
  1.1487 +
  1.1488 +      case '^':
  1.1489 +        tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
  1.1490 +        goto out;
  1.1491 +
  1.1492 +      case '&':
  1.1493 +        if (matchChar('&'))
  1.1494 +            tp->type = TOK_AND;
  1.1495 +        else
  1.1496 +            tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
  1.1497 +        goto out;
  1.1498 +
  1.1499 +      case '!':
  1.1500 +        if (matchChar('='))
  1.1501 +            tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
  1.1502 +        else
  1.1503 +            tp->type = TOK_NOT;
  1.1504 +        goto out;
  1.1505 +
  1.1506 +      case '<':
  1.1507 +        // NB: treat HTML begin-comment as comment-till-end-of-line.
  1.1508 +        if (matchChar('!')) {
  1.1509 +            if (matchChar('-')) {
  1.1510 +                if (matchChar('-'))
  1.1511 +                    goto skipline;
  1.1512 +                ungetChar('-');
  1.1513 +            }
  1.1514 +            ungetChar('!');
  1.1515 +        }
  1.1516 +        if (matchChar('<')) {
  1.1517 +            tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
  1.1518 +        } else {
  1.1519 +            tp->type = matchChar('=') ? TOK_LE : TOK_LT;
  1.1520 +        }
  1.1521 +        goto out;
  1.1522 +
  1.1523 +      case '>':
  1.1524 +        if (matchChar('>')) {
  1.1525 +            if (matchChar('>'))
  1.1526 +                tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
  1.1527 +            else
  1.1528 +                tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
  1.1529 +        } else {
  1.1530 +            tp->type = matchChar('=') ? TOK_GE : TOK_GT;
  1.1531 +        }
  1.1532 +        goto out;
  1.1533 +
  1.1534 +      case '*':
  1.1535 +        tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
  1.1536 +        goto out;
  1.1537 +
  1.1538 +      case '/':
  1.1539 +        // Look for a single-line comment.
  1.1540 +        if (matchChar('/')) {
  1.1541 +            c = peekChar();
  1.1542 +            if (c == '@' || c == '#') {
  1.1543 +                bool shouldWarn = getChar() == '@';
  1.1544 +                if (!getDirectives(false, shouldWarn))
  1.1545 +                    goto error;
  1.1546 +            }
  1.1547 +
  1.1548 +        skipline:
  1.1549 +            while ((c = getChar()) != EOF && c != '\n')
  1.1550 +                continue;
  1.1551 +            ungetChar(c);
  1.1552 +            cursor = (cursor - 1) & ntokensMask;
  1.1553 +            goto retry;
  1.1554 +        }
  1.1555 +
  1.1556 +        // Look for a multi-line comment.
  1.1557 +        if (matchChar('*')) {
  1.1558 +            unsigned linenoBefore = lineno;
  1.1559 +            while ((c = getChar()) != EOF &&
  1.1560 +                   !(c == '*' && matchChar('/'))) {
  1.1561 +                if (c == '@' || c == '#') {
  1.1562 +                    bool shouldWarn = c == '@';
  1.1563 +                    if (!getDirectives(true, shouldWarn))
  1.1564 +                        goto error;
  1.1565 +                }
  1.1566 +            }
  1.1567 +            if (c == EOF) {
  1.1568 +                reportError(JSMSG_UNTERMINATED_COMMENT);
  1.1569 +                goto error;
  1.1570 +            }
  1.1571 +            if (linenoBefore != lineno)
  1.1572 +                updateFlagsForEOL();
  1.1573 +            cursor = (cursor - 1) & ntokensMask;
  1.1574 +            goto retry;
  1.1575 +        }
  1.1576 +
  1.1577 +        // Look for a regexp.
  1.1578 +        if (modifier == Operand) {
  1.1579 +            tokenbuf.clear();
  1.1580 +
  1.1581 +            bool inCharClass = false;
  1.1582 +            for (;;) {
  1.1583 +                c = getChar();
  1.1584 +                if (c == '\\') {
  1.1585 +                    if (!tokenbuf.append(c))
  1.1586 +                        goto error;
  1.1587 +                    c = getChar();
  1.1588 +                } else if (c == '[') {
  1.1589 +                    inCharClass = true;
  1.1590 +                } else if (c == ']') {
  1.1591 +                    inCharClass = false;
  1.1592 +                } else if (c == '/' && !inCharClass) {
  1.1593 +                    // For compat with IE, allow unescaped / in char classes.
  1.1594 +                    break;
  1.1595 +                }
  1.1596 +                if (c == '\n' || c == EOF) {
  1.1597 +                    ungetChar(c);
  1.1598 +                    reportError(JSMSG_UNTERMINATED_REGEXP);
  1.1599 +                    goto error;
  1.1600 +                }
  1.1601 +                if (!tokenbuf.append(c))
  1.1602 +                    goto error;
  1.1603 +            }
  1.1604 +
  1.1605 +            RegExpFlag reflags = NoFlags;
  1.1606 +            unsigned length = tokenbuf.length() + 1;
  1.1607 +            while (true) {
  1.1608 +                c = peekChar();
  1.1609 +                if (c == 'g' && !(reflags & GlobalFlag))
  1.1610 +                    reflags = RegExpFlag(reflags | GlobalFlag);
  1.1611 +                else if (c == 'i' && !(reflags & IgnoreCaseFlag))
  1.1612 +                    reflags = RegExpFlag(reflags | IgnoreCaseFlag);
  1.1613 +                else if (c == 'm' && !(reflags & MultilineFlag))
  1.1614 +                    reflags = RegExpFlag(reflags | MultilineFlag);
  1.1615 +                else if (c == 'y' && !(reflags & StickyFlag))
  1.1616 +                    reflags = RegExpFlag(reflags | StickyFlag);
  1.1617 +                else
  1.1618 +                    break;
  1.1619 +                getChar();
  1.1620 +                length++;
  1.1621 +            }
  1.1622 +
  1.1623 +            c = peekChar();
  1.1624 +            if (JS7_ISLET(c)) {
  1.1625 +                char buf[2] = { '\0', '\0' };
  1.1626 +                tp->pos.begin += length + 1;
  1.1627 +                buf[0] = char(c);
  1.1628 +                reportError(JSMSG_BAD_REGEXP_FLAG, buf);
  1.1629 +                (void) getChar();
  1.1630 +                goto error;
  1.1631 +            }
  1.1632 +            tp->type = TOK_REGEXP;
  1.1633 +            tp->setRegExpFlags(reflags);
  1.1634 +            goto out;
  1.1635 +        }
  1.1636 +
  1.1637 +        tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
  1.1638 +        goto out;
  1.1639 +
  1.1640 +      case '%':
  1.1641 +        tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
  1.1642 +        goto out;
  1.1643 +
  1.1644 +      case '-':
  1.1645 +        if (matchChar('-')) {
  1.1646 +            if (peekChar() == '>' && !flags.isDirtyLine)
  1.1647 +                goto skipline;
  1.1648 +            tp->type = TOK_DEC;
  1.1649 +        } else {
  1.1650 +            tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
  1.1651 +        }
  1.1652 +        goto out;
  1.1653 +
  1.1654 +      badchar:
  1.1655 +      default:
  1.1656 +        reportError(JSMSG_ILLEGAL_CHARACTER);
  1.1657 +        goto error;
  1.1658 +    }
  1.1659 +
  1.1660 +    MOZ_ASSUME_UNREACHABLE("should have jumped to |out| or |error|");
  1.1661 +
  1.1662 +  out:
  1.1663 +    flags.isDirtyLine = true;
  1.1664 +    tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
  1.1665 +    JS_ASSERT(IsTokenSane(tp));
  1.1666 +    return tp->type;
  1.1667 +
  1.1668 +  error:
  1.1669 +    flags.isDirtyLine = true;
  1.1670 +    tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
  1.1671 +    tp->type = TOK_ERROR;
  1.1672 +    JS_ASSERT(IsTokenSane(tp));
  1.1673 +    onError();
  1.1674 +    return TOK_ERROR;
  1.1675 +}
  1.1676 +
  1.1677 +void
  1.1678 +TokenStream::onError()
  1.1679 +{
  1.1680 +    flags.hadError = true;
  1.1681 +#ifdef DEBUG
  1.1682 +    // Poisoning userbuf on error establishes an invariant: once an erroneous
  1.1683 +    // token has been seen, userbuf will not be consulted again.  This is true
  1.1684 +    // because the parser will either (a) deal with the TOK_ERROR token by
  1.1685 +    // aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
  1.1686 +    // match what it expected, it will unget the token, and the next getToken()
  1.1687 +    // call will immediately return the just-gotten TOK_ERROR token again
  1.1688 +    // without consulting userbuf, thanks to the lookahead buffer.
  1.1689 +    userbuf.poison();
  1.1690 +#endif
  1.1691 +}
  1.1692 +
  1.1693 +JS_FRIEND_API(int)
  1.1694 +js_fgets(char *buf, int size, FILE *file)
  1.1695 +{
  1.1696 +    int n, i, c;
  1.1697 +    bool crflag;
  1.1698 +
  1.1699 +    n = size - 1;
  1.1700 +    if (n < 0)
  1.1701 +        return -1;
  1.1702 +
  1.1703 +    crflag = false;
  1.1704 +    for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
  1.1705 +        buf[i] = c;
  1.1706 +        if (c == '\n') {        // any \n ends a line
  1.1707 +            i++;                // keep the \n; we know there is room for \0
  1.1708 +            break;
  1.1709 +        }
  1.1710 +        if (crflag) {           // \r not followed by \n ends line at the \r
  1.1711 +            ungetc(c, file);
  1.1712 +            break;              // and overwrite c in buf with \0
  1.1713 +        }
  1.1714 +        crflag = (c == '\r');
  1.1715 +    }
  1.1716 +
  1.1717 +    buf[i] = '\0';
  1.1718 +    return i;
  1.1719 +}
  1.1720 +
  1.1721 +#ifdef DEBUG
  1.1722 +const char *
  1.1723 +TokenKindToString(TokenKind tt)
  1.1724 +{
  1.1725 +    switch (tt) {
  1.1726 +      case TOK_ERROR:           return "TOK_ERROR";
  1.1727 +      case TOK_EOF:             return "TOK_EOF";
  1.1728 +      case TOK_EOL:             return "TOK_EOL";
  1.1729 +      case TOK_SEMI:            return "TOK_SEMI";
  1.1730 +      case TOK_COMMA:           return "TOK_COMMA";
  1.1731 +      case TOK_HOOK:            return "TOK_HOOK";
  1.1732 +      case TOK_COLON:           return "TOK_COLON";
  1.1733 +      case TOK_OR:              return "TOK_OR";
  1.1734 +      case TOK_AND:             return "TOK_AND";
  1.1735 +      case TOK_BITOR:           return "TOK_BITOR";
  1.1736 +      case TOK_BITXOR:          return "TOK_BITXOR";
  1.1737 +      case TOK_BITAND:          return "TOK_BITAND";
  1.1738 +      case TOK_ADD:             return "TOK_ADD";
  1.1739 +      case TOK_SUB:             return "TOK_SUB";
  1.1740 +      case TOK_MUL:             return "TOK_MUL";
  1.1741 +      case TOK_DIV:             return "TOK_DIV";
  1.1742 +      case TOK_MOD:             return "TOK_MOD";
  1.1743 +      case TOK_INC:             return "TOK_INC";
  1.1744 +      case TOK_DEC:             return "TOK_DEC";
  1.1745 +      case TOK_DOT:             return "TOK_DOT";
  1.1746 +      case TOK_TRIPLEDOT:       return "TOK_TRIPLEDOT";
  1.1747 +      case TOK_LB:              return "TOK_LB";
  1.1748 +      case TOK_RB:              return "TOK_RB";
  1.1749 +      case TOK_LC:              return "TOK_LC";
  1.1750 +      case TOK_RC:              return "TOK_RC";
  1.1751 +      case TOK_LP:              return "TOK_LP";
  1.1752 +      case TOK_RP:              return "TOK_RP";
  1.1753 +      case TOK_ARROW:           return "TOK_ARROW";
  1.1754 +      case TOK_NAME:            return "TOK_NAME";
  1.1755 +      case TOK_NUMBER:          return "TOK_NUMBER";
  1.1756 +      case TOK_STRING:          return "TOK_STRING";
  1.1757 +      case TOK_REGEXP:          return "TOK_REGEXP";
  1.1758 +      case TOK_TRUE:            return "TOK_TRUE";
  1.1759 +      case TOK_FALSE:           return "TOK_FALSE";
  1.1760 +      case TOK_NULL:            return "TOK_NULL";
  1.1761 +      case TOK_THIS:            return "TOK_THIS";
  1.1762 +      case TOK_FUNCTION:        return "TOK_FUNCTION";
  1.1763 +      case TOK_IF:              return "TOK_IF";
  1.1764 +      case TOK_ELSE:            return "TOK_ELSE";
  1.1765 +      case TOK_SWITCH:          return "TOK_SWITCH";
  1.1766 +      case TOK_CASE:            return "TOK_CASE";
  1.1767 +      case TOK_DEFAULT:         return "TOK_DEFAULT";
  1.1768 +      case TOK_WHILE:           return "TOK_WHILE";
  1.1769 +      case TOK_DO:              return "TOK_DO";
  1.1770 +      case TOK_FOR:             return "TOK_FOR";
  1.1771 +      case TOK_BREAK:           return "TOK_BREAK";
  1.1772 +      case TOK_CONTINUE:        return "TOK_CONTINUE";
  1.1773 +      case TOK_IN:              return "TOK_IN";
  1.1774 +      case TOK_VAR:             return "TOK_VAR";
  1.1775 +      case TOK_CONST:           return "TOK_CONST";
  1.1776 +      case TOK_WITH:            return "TOK_WITH";
  1.1777 +      case TOK_RETURN:          return "TOK_RETURN";
  1.1778 +      case TOK_NEW:             return "TOK_NEW";
  1.1779 +      case TOK_DELETE:          return "TOK_DELETE";
  1.1780 +      case TOK_TRY:             return "TOK_TRY";
  1.1781 +      case TOK_CATCH:           return "TOK_CATCH";
  1.1782 +      case TOK_FINALLY:         return "TOK_FINALLY";
  1.1783 +      case TOK_THROW:           return "TOK_THROW";
  1.1784 +      case TOK_INSTANCEOF:      return "TOK_INSTANCEOF";
  1.1785 +      case TOK_DEBUGGER:        return "TOK_DEBUGGER";
  1.1786 +      case TOK_YIELD:           return "TOK_YIELD";
  1.1787 +      case TOK_LET:             return "TOK_LET";
  1.1788 +      case TOK_RESERVED:        return "TOK_RESERVED";
  1.1789 +      case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
  1.1790 +      case TOK_STRICTEQ:        return "TOK_STRICTEQ";
  1.1791 +      case TOK_EQ:              return "TOK_EQ";
  1.1792 +      case TOK_STRICTNE:        return "TOK_STRICTNE";
  1.1793 +      case TOK_NE:              return "TOK_NE";
  1.1794 +      case TOK_TYPEOF:          return "TOK_TYPEOF";
  1.1795 +      case TOK_VOID:            return "TOK_VOID";
  1.1796 +      case TOK_NOT:             return "TOK_NOT";
  1.1797 +      case TOK_BITNOT:          return "TOK_BITNOT";
  1.1798 +      case TOK_LT:              return "TOK_LT";
  1.1799 +      case TOK_LE:              return "TOK_LE";
  1.1800 +      case TOK_GT:              return "TOK_GT";
  1.1801 +      case TOK_GE:              return "TOK_GE";
  1.1802 +      case TOK_LSH:             return "TOK_LSH";
  1.1803 +      case TOK_RSH:             return "TOK_RSH";
  1.1804 +      case TOK_URSH:            return "TOK_URSH";
  1.1805 +      case TOK_ASSIGN:          return "TOK_ASSIGN";
  1.1806 +      case TOK_ADDASSIGN:       return "TOK_ADDASSIGN";
  1.1807 +      case TOK_SUBASSIGN:       return "TOK_SUBASSIGN";
  1.1808 +      case TOK_BITORASSIGN:     return "TOK_BITORASSIGN";
  1.1809 +      case TOK_BITXORASSIGN:    return "TOK_BITXORASSIGN";
  1.1810 +      case TOK_BITANDASSIGN:    return "TOK_BITANDASSIGN";
  1.1811 +      case TOK_LSHASSIGN:       return "TOK_LSHASSIGN";
  1.1812 +      case TOK_RSHASSIGN:       return "TOK_RSHASSIGN";
  1.1813 +      case TOK_URSHASSIGN:      return "TOK_URSHASSIGN";
  1.1814 +      case TOK_MULASSIGN:       return "TOK_MULASSIGN";
  1.1815 +      case TOK_DIVASSIGN:       return "TOK_DIVASSIGN";
  1.1816 +      case TOK_MODASSIGN:       return "TOK_MODASSIGN";
  1.1817 +      case TOK_EXPORT:          return "TOK_EXPORT";
  1.1818 +      case TOK_IMPORT:          return "TOK_IMPORT";
  1.1819 +      case TOK_LIMIT:           break;
  1.1820 +    }
  1.1821 +
  1.1822 +    return "<bad TokenKind>";
  1.1823 +}
  1.1824 +#endif

mercurial