js/src/frontend/TokenStream.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/js/src/frontend/TokenStream.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,914 @@
     1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99:
     1.6 + * This Source Code Form is subject to the terms of the Mozilla Public
     1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.9 +
    1.10 +#ifndef frontend_TokenStream_h
    1.11 +#define frontend_TokenStream_h
    1.12 +
    1.13 +// JS lexical scanner interface.
    1.14 +
    1.15 +#include "mozilla/DebugOnly.h"
    1.16 +#include "mozilla/PodOperations.h"
    1.17 +
    1.18 +#include <stdarg.h>
    1.19 +#include <stddef.h>
    1.20 +#include <stdio.h>
    1.21 +
    1.22 +#include "jscntxt.h"
    1.23 +#include "jspubtd.h"
    1.24 +
    1.25 +#include "js/Vector.h"
    1.26 +#include "vm/RegExpObject.h"
    1.27 +
    1.28 +namespace js {
    1.29 +namespace frontend {
    1.30 +
    1.31 +// Values of this type are used to index into arrays such as isExprEnding[],
    1.32 +// so the first value must be zero.
    1.33 +enum TokenKind {
    1.34 +    TOK_ERROR = 0,                 // well-known as the only code < EOF
    1.35 +    TOK_EOF,                       // end of file
    1.36 +    TOK_EOL,                       // end of line; only returned by peekTokenSameLine()
    1.37 +    TOK_SEMI,                      // semicolon
    1.38 +    TOK_COMMA,                     // comma operator
    1.39 +    TOK_HOOK, TOK_COLON,           // conditional (?:)
    1.40 +    TOK_INC, TOK_DEC,              // increment/decrement (++ --)
    1.41 +    TOK_DOT,                       // member operator (.)
    1.42 +    TOK_TRIPLEDOT,                 // for rest arguments (...)
    1.43 +    TOK_LB, TOK_RB,                // left and right brackets
    1.44 +    TOK_LC, TOK_RC,                // left and right curlies (braces)
    1.45 +    TOK_LP, TOK_RP,                // left and right parentheses
    1.46 +    TOK_NAME,                      // identifier
    1.47 +    TOK_NUMBER,                    // numeric constant
    1.48 +    TOK_STRING,                    // string constant
    1.49 +    TOK_REGEXP,                    // RegExp constant
    1.50 +    TOK_TRUE,                      // true
    1.51 +    TOK_FALSE,                     // false
    1.52 +    TOK_NULL,                      // null
    1.53 +    TOK_THIS,                      // this
    1.54 +    TOK_FUNCTION,                  // function keyword
    1.55 +    TOK_IF,                        // if keyword
    1.56 +    TOK_ELSE,                      // else keyword
    1.57 +    TOK_SWITCH,                    // switch keyword
    1.58 +    TOK_CASE,                      // case keyword
    1.59 +    TOK_DEFAULT,                   // default keyword
    1.60 +    TOK_WHILE,                     // while keyword
    1.61 +    TOK_DO,                        // do keyword
    1.62 +    TOK_FOR,                       // for keyword
    1.63 +    TOK_BREAK,                     // break keyword
    1.64 +    TOK_CONTINUE,                  // continue keyword
    1.65 +    TOK_VAR,                       // var keyword
    1.66 +    TOK_CONST,                     // const keyword
    1.67 +    TOK_WITH,                      // with keyword
    1.68 +    TOK_RETURN,                    // return keyword
    1.69 +    TOK_NEW,                       // new keyword
    1.70 +    TOK_DELETE,                    // delete keyword
    1.71 +    TOK_TRY,                       // try keyword
    1.72 +    TOK_CATCH,                     // catch keyword
    1.73 +    TOK_FINALLY,                   // finally keyword
    1.74 +    TOK_THROW,                     // throw keyword
    1.75 +    TOK_DEBUGGER,                  // debugger keyword
    1.76 +    TOK_YIELD,                     // yield from generator function
    1.77 +    TOK_LET,                       // let keyword
    1.78 +    TOK_EXPORT,                    // export keyword
    1.79 +    TOK_IMPORT,                    // import keyword
    1.80 +    TOK_RESERVED,                  // reserved keywords
    1.81 +    TOK_STRICT_RESERVED,           // reserved keywords in strict mode
    1.82 +
    1.83 +    // The following token types occupy contiguous ranges to enable easy
    1.84 +    // range-testing.
    1.85 +
    1.86 +    // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same
    1.87 +    // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h.
    1.88 +    TOK_OR,                        // logical or (||)
    1.89 +    TOK_BINOP_FIRST = TOK_OR,
    1.90 +    TOK_AND,                       // logical and (&&)
    1.91 +    TOK_BITOR,                     // bitwise-or (|)
    1.92 +    TOK_BITXOR,                    // bitwise-xor (^)
    1.93 +    TOK_BITAND,                    // bitwise-and (&)
    1.94 +
    1.95 +    // Equality operation tokens, per TokenKindIsEquality.
    1.96 +    TOK_STRICTEQ,
    1.97 +    TOK_EQUALITY_START = TOK_STRICTEQ,
    1.98 +    TOK_EQ,
    1.99 +    TOK_STRICTNE,
   1.100 +    TOK_NE,
   1.101 +    TOK_EQUALITY_LAST = TOK_NE,
   1.102 +
   1.103 +    // Relational ops (< <= > >=), per TokenKindIsRelational.
   1.104 +    TOK_LT,
   1.105 +    TOK_RELOP_START = TOK_LT,
   1.106 +    TOK_LE,
   1.107 +    TOK_GT,
   1.108 +    TOK_GE,
   1.109 +    TOK_RELOP_LAST = TOK_GE,
   1.110 +
   1.111 +    TOK_INSTANCEOF,                // |instanceof| keyword
   1.112 +    TOK_IN,                        // |in| keyword
   1.113 +
   1.114 +    // Shift ops (<< >> >>>), per TokenKindIsShift.
   1.115 +    TOK_LSH,
   1.116 +    TOK_SHIFTOP_START = TOK_LSH,
   1.117 +    TOK_RSH,
   1.118 +    TOK_URSH,
   1.119 +    TOK_SHIFTOP_LAST = TOK_URSH,
   1.120 +
   1.121 +    TOK_ADD,
   1.122 +    TOK_SUB,
   1.123 +    TOK_MUL,
   1.124 +    TOK_DIV,
   1.125 +    TOK_MOD,
   1.126 +    TOK_BINOP_LAST = TOK_MOD,
   1.127 +
   1.128 +    // Unary operation tokens.
   1.129 +    TOK_TYPEOF,
   1.130 +    TOK_VOID,
   1.131 +    TOK_NOT,
   1.132 +    TOK_BITNOT,
   1.133 +
   1.134 +    TOK_ARROW,                     // function arrow (=>)
   1.135 +
   1.136 +    // Assignment ops (= += -= etc.), per TokenKindIsAssignment
   1.137 +    TOK_ASSIGN,
   1.138 +    TOK_ASSIGNMENT_START = TOK_ASSIGN,
   1.139 +    TOK_ADDASSIGN,
   1.140 +    TOK_SUBASSIGN,
   1.141 +    TOK_BITORASSIGN,
   1.142 +    TOK_BITXORASSIGN,
   1.143 +    TOK_BITANDASSIGN,
   1.144 +    TOK_LSHASSIGN,
   1.145 +    TOK_RSHASSIGN,
   1.146 +    TOK_URSHASSIGN,
   1.147 +    TOK_MULASSIGN,
   1.148 +    TOK_DIVASSIGN,
   1.149 +    TOK_MODASSIGN,
   1.150 +    TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,
   1.151 +
   1.152 +    TOK_LIMIT                      // domain size
   1.153 +};
   1.154 +
   1.155 +inline bool
   1.156 +TokenKindIsBinaryOp(TokenKind tt)
   1.157 +{
   1.158 +    return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST;
   1.159 +}
   1.160 +
   1.161 +inline bool
   1.162 +TokenKindIsEquality(TokenKind tt)
   1.163 +{
   1.164 +    return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;
   1.165 +}
   1.166 +
   1.167 +inline bool
   1.168 +TokenKindIsRelational(TokenKind tt)
   1.169 +{
   1.170 +    return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;
   1.171 +}
   1.172 +
   1.173 +inline bool
   1.174 +TokenKindIsShift(TokenKind tt)
   1.175 +{
   1.176 +    return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;
   1.177 +}
   1.178 +
   1.179 +inline bool
   1.180 +TokenKindIsAssignment(TokenKind tt)
   1.181 +{
   1.182 +    return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;
   1.183 +}
   1.184 +
   1.185 +inline bool
   1.186 +TokenKindIsDecl(TokenKind tt)
   1.187 +{
   1.188 +    return tt == TOK_VAR || tt == TOK_LET;
   1.189 +}
   1.190 +
   1.191 +struct TokenPos {
   1.192 +    uint32_t    begin;  // Offset of the token's first char.
   1.193 +    uint32_t    end;    // Offset of 1 past the token's last char.
   1.194 +
   1.195 +    TokenPos() {}
   1.196 +    TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
   1.197 +
   1.198 +    // Return a TokenPos that covers left, right, and anything in between.
   1.199 +    static TokenPos box(const TokenPos &left, const TokenPos &right) {
   1.200 +        JS_ASSERT(left.begin <= left.end);
   1.201 +        JS_ASSERT(left.end <= right.begin);
   1.202 +        JS_ASSERT(right.begin <= right.end);
   1.203 +        return TokenPos(left.begin, right.end);
   1.204 +    }
   1.205 +
   1.206 +    bool operator==(const TokenPos& bpos) const {
   1.207 +        return begin == bpos.begin && end == bpos.end;
   1.208 +    }
   1.209 +
   1.210 +    bool operator!=(const TokenPos& bpos) const {
   1.211 +        return begin != bpos.begin || end != bpos.end;
   1.212 +    }
   1.213 +
   1.214 +    bool operator <(const TokenPos& bpos) const {
   1.215 +        return begin < bpos.begin;
   1.216 +    }
   1.217 +
   1.218 +    bool operator <=(const TokenPos& bpos) const {
   1.219 +        return begin <= bpos.begin;
   1.220 +    }
   1.221 +
   1.222 +    bool operator >(const TokenPos& bpos) const {
   1.223 +        return !(*this <= bpos);
   1.224 +    }
   1.225 +
   1.226 +    bool operator >=(const TokenPos& bpos) const {
   1.227 +        return !(*this < bpos);
   1.228 +    }
   1.229 +
   1.230 +    bool encloses(const TokenPos& pos) const {
   1.231 +        return begin <= pos.begin && pos.end <= end;
   1.232 +    }
   1.233 +};
   1.234 +
   1.235 +enum DecimalPoint { NoDecimal = false, HasDecimal = true };
   1.236 +
   1.237 +struct Token
   1.238 +{
   1.239 +    TokenKind           type;           // char value or above enumerator
   1.240 +    TokenPos            pos;            // token position in file
   1.241 +    union {
   1.242 +      private:
   1.243 +        friend struct Token;
   1.244 +        PropertyName    *name;          // non-numeric atom
   1.245 +        JSAtom          *atom;          // potentially-numeric atom
   1.246 +        struct {
   1.247 +            double      value;          // floating point number
   1.248 +            DecimalPoint decimalPoint;  // literal contains '.'
   1.249 +        } number;
   1.250 +        RegExpFlag      reflags;        // regexp flags; use tokenbuf to access
   1.251 +                                        //   regexp chars
   1.252 +    } u;
   1.253 +
   1.254 +    // This constructor is necessary only for MSVC 2013 and how it compiles the
   1.255 +    // initialization of TokenStream::tokens.  That field is initialized as
   1.256 +    // tokens() in the constructor init-list.  This *should* zero the entire
   1.257 +    // array, then (because Token has a non-trivial constructor, because
   1.258 +    // TokenPos has a user-provided constructor) call the implicit Token
   1.259 +    // constructor on each element, which would call the TokenPos constructor
   1.260 +    // for Token::pos and do nothing.  (All of which is equivalent to just
   1.261 +    // zeroing TokenStream::tokens.)  But MSVC 2013 (2010/2012 don't have this
   1.262 +    // bug) doesn't zero out each element, so we need this extra constructor to
   1.263 +    // make it do the right thing.  (Token is used primarily by reference or
   1.264 +    // pointer, and it's only initialized a very few places, so having a
   1.265 +    // user-defined constructor won't hurt perf.)  See also bug 920318.
   1.266 +    Token()
   1.267 +      : type(TOK_ERROR),
   1.268 +        pos(0, 0)
   1.269 +    {
   1.270 +    }
   1.271 +
   1.272 +    // Mutators
   1.273 +
   1.274 +    void setName(PropertyName *name) {
   1.275 +        JS_ASSERT(type == TOK_NAME);
   1.276 +        JS_ASSERT(!IsPoisonedPtr(name));
   1.277 +        u.name = name;
   1.278 +    }
   1.279 +
   1.280 +    void setAtom(JSAtom *atom) {
   1.281 +        JS_ASSERT(type == TOK_STRING);
   1.282 +        JS_ASSERT(!IsPoisonedPtr(atom));
   1.283 +        u.atom = atom;
   1.284 +    }
   1.285 +
   1.286 +    void setRegExpFlags(js::RegExpFlag flags) {
   1.287 +        JS_ASSERT(type == TOK_REGEXP);
   1.288 +        JS_ASSERT((flags & AllFlags) == flags);
   1.289 +        u.reflags = flags;
   1.290 +    }
   1.291 +
   1.292 +    void setNumber(double n, DecimalPoint decimalPoint) {
   1.293 +        JS_ASSERT(type == TOK_NUMBER);
   1.294 +        u.number.value = n;
   1.295 +        u.number.decimalPoint = decimalPoint;
   1.296 +    }
   1.297 +
   1.298 +    // Type-safe accessors
   1.299 +
   1.300 +    PropertyName *name() const {
   1.301 +        JS_ASSERT(type == TOK_NAME);
   1.302 +        return u.name->asPropertyName(); // poor-man's type verification
   1.303 +    }
   1.304 +
   1.305 +    JSAtom *atom() const {
   1.306 +        JS_ASSERT(type == TOK_STRING);
   1.307 +        return u.atom;
   1.308 +    }
   1.309 +
   1.310 +    js::RegExpFlag regExpFlags() const {
   1.311 +        JS_ASSERT(type == TOK_REGEXP);
   1.312 +        JS_ASSERT((u.reflags & AllFlags) == u.reflags);
   1.313 +        return u.reflags;
   1.314 +    }
   1.315 +
   1.316 +    double number() const {
   1.317 +        JS_ASSERT(type == TOK_NUMBER);
   1.318 +        return u.number.value;
   1.319 +    }
   1.320 +
   1.321 +    DecimalPoint decimalPoint() const {
   1.322 +        JS_ASSERT(type == TOK_NUMBER);
   1.323 +        return u.number.decimalPoint;
   1.324 +    }
   1.325 +};
   1.326 +
   1.327 +struct CompileError {
   1.328 +    JSErrorReport report;
   1.329 +    char *message;
   1.330 +    ErrorArgumentsType argumentsType;
   1.331 +    CompileError()
   1.332 +      : message(nullptr), argumentsType(ArgumentsAreUnicode)
   1.333 +    {
   1.334 +        mozilla::PodZero(&report);
   1.335 +    }
   1.336 +    ~CompileError();
   1.337 +    void throwError(JSContext *cx);
   1.338 +
   1.339 +  private:
   1.340 +    // CompileError owns raw allocated memory, so disable assignment and copying
   1.341 +    // for safety.
   1.342 +    void operator=(const CompileError &) MOZ_DELETE;
   1.343 +    CompileError(const CompileError &) MOZ_DELETE;
   1.344 +};
   1.345 +
   1.346 +// Ideally, tokenizing would be entirely independent of context.  But the
   1.347 +// strict mode flag, which is in SharedContext, affects tokenizing, and
   1.348 +// TokenStream needs to see it.
   1.349 +//
   1.350 +// This class is a tiny back-channel from TokenStream to the strict mode flag
   1.351 +// that avoids exposing the rest of SharedContext to TokenStream.
   1.352 +//
   1.353 +class StrictModeGetter {
   1.354 +  public:
   1.355 +    virtual bool strictMode() = 0;
   1.356 +};
   1.357 +
   1.358 +// TokenStream is the lexical scanner for Javascript source text.
   1.359 +//
   1.360 +// It takes a buffer of jschars and linearly scans it into |Token|s.
   1.361 +// Internally the class uses a four element circular buffer |tokens| of
   1.362 +// |Token|s. As an index for |tokens|, the member |cursor| points to the
   1.363 +// current token.
   1.364 +// Calls to getToken() increase |cursor| by one and return the new current
   1.365 +// token. If a TokenStream was just created, the current token is initialized
   1.366 +// with random data (i.e. not initialized). It is therefore important that
   1.367 +// one of the first four member functions listed below is called first.
   1.368 +// The circular buffer lets us go back up to two tokens from the last
   1.369 +// scanned token. Internally, the relative number of backward steps that were
   1.370 +// taken (via ungetToken()) after the last token was scanned is stored in
   1.371 +// |lookahead|.
   1.372 +//
   1.373 +// The following table lists in which situations it is safe to call each listed
   1.374 +// function. No checks are made by the functions in non-debug builds.
   1.375 +//
   1.376 +// Function Name     | Precondition; changes to |lookahead|
   1.377 +// ------------------+---------------------------------------------------------
   1.378 +// getToken          | none; if |lookahead > 0| then |lookahead--|
   1.379 +// peekToken         | none; if |lookahead == 0| then |lookahead == 1|
   1.380 +// peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
   1.381 +// matchToken        | none; if |lookahead > 0| and the match succeeds then
   1.382 +//                   |       |lookahead--|
   1.383 +// consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
   1.384 +// ungetToken        | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
   1.385 +//
   1.386 +// The behavior of the token scanning process (see getTokenInternal()) can be
   1.387 +// modified by calling one of the first four above listed member functions with
   1.388 +// an optional argument of type Modifier.  However, the modifier will be
   1.389 +// ignored unless |lookahead == 0| holds.  Due to constraints of the grammar,
   1.390 +// this turns out not to be a problem in practice. See the
   1.391 +// mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
   1.392 +// for more details:
   1.393 +// https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
   1.394 +//
   1.395 +// The methods seek() and tell() allow to rescan from a previous visited
   1.396 +// location of the buffer.
   1.397 +//
   1.398 +class MOZ_STACK_CLASS TokenStream
   1.399 +{
   1.400 +    // Unicode separators that are treated as line terminators, in addition to \n, \r.
   1.401 +    enum {
   1.402 +        LINE_SEPARATOR = 0x2028,
   1.403 +        PARA_SEPARATOR = 0x2029
   1.404 +    };
   1.405 +
   1.406 +    static const size_t ntokens = 4;                // 1 current + 2 lookahead, rounded
   1.407 +                                                    // to power of 2 to avoid divmod by 3
   1.408 +    static const unsigned maxLookahead = 2;
   1.409 +    static const unsigned ntokensMask = ntokens - 1;
   1.410 +
   1.411 +  public:
   1.412 +    typedef Vector<jschar, 32> CharBuffer;
   1.413 +
   1.414 +    TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
   1.415 +                const jschar *base, size_t length, StrictModeGetter *smg);
   1.416 +
   1.417 +    ~TokenStream();
   1.418 +
   1.419 +    // Accessors.
   1.420 +    const Token &currentToken() const { return tokens[cursor]; }
   1.421 +    bool isCurrentTokenType(TokenKind type) const {
   1.422 +        return currentToken().type == type;
   1.423 +    }
   1.424 +    const CharBuffer &getTokenbuf() const { return tokenbuf; }
   1.425 +    const char *getFilename() const { return filename; }
   1.426 +    unsigned getLineno() const { return lineno; }
   1.427 +    unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; }
   1.428 +    JSPrincipals *getOriginPrincipals() const { return originPrincipals; }
   1.429 +    JSVersion versionNumber() const { return VersionNumber(options().version); }
   1.430 +    JSVersion versionWithFlags() const { return options().version; }
   1.431 +
   1.432 +    PropertyName *currentName() const {
   1.433 +        if (isCurrentTokenType(TOK_YIELD))
   1.434 +            return cx->names().yield;
   1.435 +        JS_ASSERT(isCurrentTokenType(TOK_NAME));
   1.436 +        return currentToken().name();
   1.437 +    }
   1.438 +
   1.439 +    bool isCurrentTokenAssignment() const {
   1.440 +        return TokenKindIsAssignment(currentToken().type);
   1.441 +    }
   1.442 +
   1.443 +    // Flag methods.
   1.444 +    bool isEOF() const { return flags.isEOF; }
   1.445 +    bool sawOctalEscape() const { return flags.sawOctalEscape; }
   1.446 +    bool hadError() const { return flags.hadError; }
   1.447 +
   1.448 +    // TokenStream-specific error reporters.
   1.449 +    bool reportError(unsigned errorNumber, ...);
   1.450 +    bool reportWarning(unsigned errorNumber, ...);
   1.451 +
   1.452 +    static const uint32_t NoOffset = UINT32_MAX;
   1.453 +
   1.454 +    // General-purpose error reporters.  You should avoid calling these
   1.455 +    // directly, and instead use the more succinct alternatives (e.g.
   1.456 +    // reportError()) in TokenStream, Parser, and BytecodeEmitter.
   1.457 +    bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
   1.458 +                                    va_list args);
   1.459 +    bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
   1.460 +                                       va_list args);
   1.461 +    bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,
   1.462 +                                          va_list args);
   1.463 +
   1.464 +    // asm.js reporter
   1.465 +    void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
   1.466 +
   1.467 +  private:
   1.468 +    // These are private because they should only be called by the tokenizer
   1.469 +    // while tokenizing not by, for example, BytecodeEmitter.
   1.470 +    bool reportStrictModeError(unsigned errorNumber, ...);
   1.471 +    bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
   1.472 +
   1.473 +    void onError();
   1.474 +    static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb);
   1.475 +    bool putIdentInTokenbuf(const jschar *identStart);
   1.476 +
   1.477 +    struct Flags
   1.478 +    {
   1.479 +        bool isEOF:1;           // Hit end of file.
   1.480 +        bool isDirtyLine:1;     // Non-whitespace since start of line.
   1.481 +        bool sawOctalEscape:1;  // Saw an octal character escape.
   1.482 +        bool hadError:1;        // Returned TOK_ERROR from getToken.
   1.483 +
   1.484 +        Flags()
   1.485 +          : isEOF(), isDirtyLine(), sawOctalEscape(), hadError()
   1.486 +        {}
   1.487 +    };
   1.488 +
   1.489 +  public:
   1.490 +    // Sometimes the parser needs to modify how tokens are created.
   1.491 +    enum Modifier
   1.492 +    {
   1.493 +        None,           // Normal operation.
   1.494 +        Operand,        // Looking for an operand, not an operator.  In
   1.495 +                        //   practice, this means that when '/' is seen,
   1.496 +                        //   we look for a regexp instead of just returning
   1.497 +                        //   TOK_DIV.
   1.498 +        KeywordIsName,  // Treat keywords as names by returning TOK_NAME.
   1.499 +    };
   1.500 +
   1.501 +    // Get the next token from the stream, make it the current token, and
   1.502 +    // return its kind.
   1.503 +    TokenKind getToken(Modifier modifier = None) {
   1.504 +        // Check for a pushed-back token resulting from mismatching lookahead.
   1.505 +        if (lookahead != 0) {
   1.506 +            lookahead--;
   1.507 +            cursor = (cursor + 1) & ntokensMask;
   1.508 +            TokenKind tt = currentToken().type;
   1.509 +            JS_ASSERT(tt != TOK_EOL);
   1.510 +            return tt;
   1.511 +        }
   1.512 +
   1.513 +        return getTokenInternal(modifier);
   1.514 +    }
   1.515 +
   1.516 +    // Push the last scanned token back into the stream.
   1.517 +    void ungetToken() {
   1.518 +        JS_ASSERT(lookahead < maxLookahead);
   1.519 +        lookahead++;
   1.520 +        cursor = (cursor - 1) & ntokensMask;
   1.521 +    }
   1.522 +
   1.523 +    TokenKind peekToken(Modifier modifier = None) {
   1.524 +        if (lookahead != 0)
   1.525 +            return tokens[(cursor + 1) & ntokensMask].type;
   1.526 +        TokenKind tt = getTokenInternal(modifier);
   1.527 +        ungetToken();
   1.528 +        return tt;
   1.529 +    }
   1.530 +
   1.531 +    TokenPos peekTokenPos(Modifier modifier = None) {
   1.532 +        if (lookahead != 0)
   1.533 +            return tokens[(cursor + 1) & ntokensMask].pos;
   1.534 +        getTokenInternal(modifier);
   1.535 +        ungetToken();
   1.536 +        JS_ASSERT(lookahead != 0);
   1.537 +        return tokens[(cursor + 1) & ntokensMask].pos;
   1.538 +    }
   1.539 +
   1.540 +    // This is like peekToken(), with one exception:  if there is an EOL
   1.541 +    // between the end of the current token and the start of the next token, it
   1.542 +    // returns TOK_EOL.  In that case, no token with TOK_EOL is actually
   1.543 +    // created, just a TOK_EOL TokenKind is returned, and currentToken()
   1.544 +    // shouldn't be consulted.  (This is the only place TOK_EOL is produced.)
   1.545 +    MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) {
   1.546 +       const Token &curr = currentToken();
   1.547 +
   1.548 +        // If lookahead != 0, we have scanned ahead at least one token, and
   1.549 +        // |lineno| is the line that the furthest-scanned token ends on.  If
   1.550 +        // it's the same as the line that the current token ends on, that's a
   1.551 +        // stronger condition than what we are looking for, and we don't need
   1.552 +        // to return TOK_EOL.
   1.553 +        if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno))
   1.554 +            return tokens[(cursor + 1) & ntokensMask].type;
   1.555 +
   1.556 +        // The above check misses two cases where we don't have to return
   1.557 +        // TOK_EOL.
   1.558 +        // - The next token starts on the same line, but is a multi-line token.
   1.559 +        // - The next token starts on the same line, but lookahead==2 and there
   1.560 +        //   is a newline between the next token and the one after that.
   1.561 +        // The following test is somewhat expensive but gets these cases (and
   1.562 +        // all others) right.
   1.563 +        (void)getToken(modifier);
   1.564 +        const Token &next = currentToken();
   1.565 +        ungetToken();
   1.566 +        return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin)
   1.567 +               ? next.type
   1.568 +               : TOK_EOL;
   1.569 +    }
   1.570 +
   1.571 +    // Get the next token from the stream if its kind is |tt|.
   1.572 +    bool matchToken(TokenKind tt, Modifier modifier = None) {
   1.573 +        if (getToken(modifier) == tt)
   1.574 +            return true;
   1.575 +        ungetToken();
   1.576 +        return false;
   1.577 +    }
   1.578 +
   1.579 +    void consumeKnownToken(TokenKind tt) {
   1.580 +        JS_ALWAYS_TRUE(matchToken(tt));
   1.581 +    }
   1.582 +
   1.583 +    bool matchContextualKeyword(Handle<PropertyName*> keyword) {
   1.584 +        if (getToken() == TOK_NAME && currentToken().name() == keyword)
   1.585 +            return true;
   1.586 +        ungetToken();
   1.587 +        return false;
   1.588 +    }
   1.589 +
   1.590 +    bool nextTokenEndsExpr() {
   1.591 +        return isExprEnding[peekToken()];
   1.592 +    }
   1.593 +
   1.594 +    class MOZ_STACK_CLASS Position {
   1.595 +      public:
   1.596 +        // The Token fields may contain pointers to atoms, so for correct
   1.597 +        // rooting we must ensure collection of atoms is disabled while objects
   1.598 +        // of this class are live.  Do this by requiring a dummy AutoKeepAtoms
   1.599 +        // reference in the constructor.
   1.600 +        //
   1.601 +        // This class is explicity ignored by the analysis, so don't add any
   1.602 +        // more pointers to GC things here!
   1.603 +        Position(AutoKeepAtoms&) { }
   1.604 +      private:
   1.605 +        Position(const Position&) MOZ_DELETE;
   1.606 +        friend class TokenStream;
   1.607 +        const jschar *buf;
   1.608 +        Flags flags;
   1.609 +        unsigned lineno;
   1.610 +        const jschar *linebase;
   1.611 +        const jschar *prevLinebase;
   1.612 +        Token currentToken;
   1.613 +        unsigned lookahead;
   1.614 +        Token lookaheadTokens[maxLookahead];
   1.615 +    };
   1.616 +
   1.617 +    void advance(size_t position);
   1.618 +    void tell(Position *);
   1.619 +    void seek(const Position &pos);
   1.620 +    bool seek(const Position &pos, const TokenStream &other);
   1.621 +
   1.622 +    size_t positionToOffset(const Position &pos) const {
   1.623 +        return pos.buf - userbuf.base();
   1.624 +    }
   1.625 +
   1.626 +    const jschar *rawBase() const {
   1.627 +        return userbuf.base();
   1.628 +    }
   1.629 +
   1.630 +    const jschar *rawLimit() const {
   1.631 +        return userbuf.limit();
   1.632 +    }
   1.633 +
   1.634 +    bool hasDisplayURL() const {
   1.635 +        return displayURL_ != nullptr;
   1.636 +    }
   1.637 +
   1.638 +    jschar *displayURL() {
   1.639 +        return displayURL_;
   1.640 +    }
   1.641 +
   1.642 +    bool hasSourceMapURL() const {
   1.643 +        return sourceMapURL_ != nullptr;
   1.644 +    }
   1.645 +
   1.646 +    jschar *sourceMapURL() {
   1.647 +        return sourceMapURL_;
   1.648 +    }
   1.649 +
   1.650 +    // If the name at s[0:length] is not a keyword in this version, return
   1.651 +    // true with *ttp unchanged.
   1.652 +    //
   1.653 +    // If it is a reserved word in this version and strictness mode, and thus
   1.654 +    // can't be present in correct code, report a SyntaxError and return false.
   1.655 +    //
   1.656 +    // If it is a keyword, like "if", the behavior depends on ttp. If ttp is
   1.657 +    // null, report a SyntaxError ("if is a reserved identifier") and return
   1.658 +    // false. If ttp is non-null, return true with the keyword's TokenKind in
   1.659 +    // *ttp.
   1.660 +    bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp);
   1.661 +
   1.662 +    // This class maps a userbuf offset (which is 0-indexed) to a line number
   1.663 +    // (which is 1-indexed) and a column index (which is 0-indexed).
   1.664 +    class SourceCoords
   1.665 +    {
   1.666 +        // For a given buffer holding source code, |lineStartOffsets_| has one
   1.667 +        // element per line of source code, plus one sentinel element.  Each
   1.668 +        // non-sentinel element holds the buffer offset for the start of the
   1.669 +        // corresponding line of source code.  For this example script:
   1.670 +        //
   1.671 +        // 1  // xyz            [line starts at offset 0]
   1.672 +        // 2  var x;            [line starts at offset 7]
   1.673 +        // 3                    [line starts at offset 14]
   1.674 +        // 4  var y;            [line starts at offset 15]
   1.675 +        //
   1.676 +        // |lineStartOffsets_| is:
   1.677 +        //
   1.678 +        //   [0, 7, 14, 15, MAX_PTR]
   1.679 +        //
   1.680 +        // To convert a "line number" to a "line index" (i.e. an index into
   1.681 +        // |lineStartOffsets_|), subtract |initialLineNum_|.  E.g. line 3's
   1.682 +        // line index is (3 - initialLineNum_), which is 2.  Therefore
   1.683 +        // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
   1.684 +        // which is 14.  (Note that |initialLineNum_| is often 1, but not
   1.685 +        // always.)
   1.686 +        //
   1.687 +        // The first element is always 0, and the last element is always the
   1.688 +        // MAX_PTR sentinel.
   1.689 +        //
   1.690 +        // offset-to-line/column lookups are O(log n) in the worst case (binary
   1.691 +        // search), but in practice they're heavily clustered and we do better
   1.692 +        // than that by using the previous lookup's result (lastLineIndex_) as
   1.693 +        // a starting point.
   1.694 +        //
   1.695 +        // Checking if an offset lies within a particular line number
   1.696 +        // (isOnThisLine()) is O(1).
   1.697 +        //
   1.698 +        Vector<uint32_t, 128> lineStartOffsets_;
   1.699 +        uint32_t            initialLineNum_;
   1.700 +
   1.701 +        // This is mutable because it's modified on every search, but that fact
   1.702 +        // isn't visible outside this class.
   1.703 +        mutable uint32_t    lastLineIndex_;
   1.704 +
   1.705 +        uint32_t lineIndexOf(uint32_t offset) const;
   1.706 +
   1.707 +        static const uint32_t MAX_PTR = UINT32_MAX;
   1.708 +
   1.709 +        uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
   1.710 +        uint32_t lineNumToIndex(uint32_t lineNum)   const { return lineNum   - initialLineNum_; }
   1.711 +
   1.712 +      public:
   1.713 +        SourceCoords(ExclusiveContext *cx, uint32_t ln);
   1.714 +
   1.715 +        void add(uint32_t lineNum, uint32_t lineStartOffset);
   1.716 +        bool fill(const SourceCoords &other);
   1.717 +
   1.718 +        bool isOnThisLine(uint32_t offset, uint32_t lineNum) const {
   1.719 +            uint32_t lineIndex = lineNumToIndex(lineNum);
   1.720 +            JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length());  // +1 due to sentinel
   1.721 +            return lineStartOffsets_[lineIndex] <= offset &&
   1.722 +                   offset < lineStartOffsets_[lineIndex + 1];
   1.723 +        }
   1.724 +
   1.725 +        uint32_t lineNum(uint32_t offset) const;
   1.726 +        uint32_t columnIndex(uint32_t offset) const;
   1.727 +        void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const;
   1.728 +    };
   1.729 +
   1.730 +    SourceCoords srcCoords;
   1.731 +
   1.732 +    JSAtomState &names() const {
   1.733 +        return cx->names();
   1.734 +    }
   1.735 +
   1.736 +    ExclusiveContext *context() const {
   1.737 +        return cx;
   1.738 +    }
   1.739 +
   1.740 +    const ReadOnlyCompileOptions &options() const {
   1.741 +        return options_;
   1.742 +    }
   1.743 +
   1.744 +  private:
   1.745 +    // This is the low-level interface to the JS source code buffer.  It just
   1.746 +    // gets raw chars, basically.  TokenStreams functions are layered on top
   1.747 +    // and do some extra stuff like converting all EOL sequences to '\n',
   1.748 +    // tracking the line number, and setting |flags.isEOF|.  (The "raw" in "raw
   1.749 +    // chars" refers to the lack of EOL sequence normalization.)
   1.750 +    class TokenBuf {
   1.751 +      public:
   1.752 +        TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length)
   1.753 +          : base_(buf), limit_(buf + length), ptr(buf)
   1.754 +        { }
   1.755 +
   1.756 +        bool hasRawChars() const {
   1.757 +            return ptr < limit_;
   1.758 +        }
   1.759 +
   1.760 +        bool atStart() const {
   1.761 +            return ptr == base_;
   1.762 +        }
   1.763 +
   1.764 +        const jschar *base() const {
   1.765 +            return base_;
   1.766 +        }
   1.767 +
   1.768 +        const jschar *limit() const {
   1.769 +            return limit_;
   1.770 +        }
   1.771 +
   1.772 +        jschar getRawChar() {
   1.773 +            return *ptr++;      // this will nullptr-crash if poisoned
   1.774 +        }
   1.775 +
   1.776 +        jschar peekRawChar() const {
   1.777 +            return *ptr;        // this will nullptr-crash if poisoned
   1.778 +        }
   1.779 +
   1.780 +        bool matchRawChar(jschar c) {
   1.781 +            if (*ptr == c) {    // this will nullptr-crash if poisoned
   1.782 +                ptr++;
   1.783 +                return true;
   1.784 +            }
   1.785 +            return false;
   1.786 +        }
   1.787 +
   1.788 +        bool matchRawCharBackwards(jschar c) {
   1.789 +            JS_ASSERT(ptr);     // make sure it hasn't been poisoned
   1.790 +            if (*(ptr - 1) == c) {
   1.791 +                ptr--;
   1.792 +                return true;
   1.793 +            }
   1.794 +            return false;
   1.795 +        }
   1.796 +
   1.797 +        void ungetRawChar() {
   1.798 +            JS_ASSERT(ptr);     // make sure it hasn't been poisoned
   1.799 +            ptr--;
   1.800 +        }
   1.801 +
   1.802 +        const jschar *addressOfNextRawChar(bool allowPoisoned = false) const {
   1.803 +            JS_ASSERT_IF(!allowPoisoned, ptr);     // make sure it hasn't been poisoned
   1.804 +            return ptr;
   1.805 +        }
   1.806 +
   1.807 +        // Use this with caution!
   1.808 +        void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) {
   1.809 +            JS_ASSERT_IF(!allowPoisoned, a);
   1.810 +            ptr = a;
   1.811 +        }
   1.812 +
   1.813 +#ifdef DEBUG
   1.814 +        // Poison the TokenBuf so it cannot be accessed again.
   1.815 +        void poison() {
   1.816 +            ptr = nullptr;
   1.817 +        }
   1.818 +#endif
   1.819 +
   1.820 +        static bool isRawEOLChar(int32_t c) {
   1.821 +            return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR;
   1.822 +        }
   1.823 +
   1.824 +        // Finds the next EOL, but stops once 'max' jschars have been scanned
   1.825 +        // (*including* the starting jschar).
   1.826 +        const jschar *findEOLMax(const jschar *p, size_t max);
   1.827 +
   1.828 +      private:
   1.829 +        const jschar *base_;            // base of buffer
   1.830 +        const jschar *limit_;           // limit for quick bounds check
   1.831 +        const jschar *ptr;              // next char to get
   1.832 +    };
   1.833 +
   1.834 +    TokenKind getTokenInternal(Modifier modifier);
   1.835 +
   1.836 +    int32_t getChar();
   1.837 +    int32_t getCharIgnoreEOL();
   1.838 +    void ungetChar(int32_t c);
   1.839 +    void ungetCharIgnoreEOL(int32_t c);
   1.840 +    Token *newToken(ptrdiff_t adjust);
   1.841 +    bool peekUnicodeEscape(int32_t *c);
   1.842 +    bool matchUnicodeEscapeIdStart(int32_t *c);
   1.843 +    bool matchUnicodeEscapeIdent(int32_t *c);
   1.844 +    bool peekChars(int n, jschar *cp);
   1.845 +
   1.846 +    bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
   1.847 +    bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
   1.848 +                      const char *directive, int directiveLength,
   1.849 +                      const char *errorMsgPragma, jschar **destination);
   1.850 +    bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
   1.851 +    bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);
   1.852 +
   1.853 +    // |expect| cannot be an EOL char.
   1.854 +    bool matchChar(int32_t expect) {
   1.855 +        MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
   1.856 +        return MOZ_LIKELY(userbuf.hasRawChars()) &&
   1.857 +               userbuf.matchRawChar(expect);
   1.858 +    }
   1.859 +
   1.860 +    void consumeKnownChar(int32_t expect) {
   1.861 +        mozilla::DebugOnly<int32_t> c = getChar();
   1.862 +        JS_ASSERT(c == expect);
   1.863 +    }
   1.864 +
   1.865 +    int32_t peekChar() {
   1.866 +        int32_t c = getChar();
   1.867 +        ungetChar(c);
   1.868 +        return c;
   1.869 +    }
   1.870 +
   1.871 +    void skipChars(int n) {
   1.872 +        while (--n >= 0)
   1.873 +            getChar();
   1.874 +    }
   1.875 +
   1.876 +    void updateLineInfoForEOL();
   1.877 +    void updateFlagsForEOL();
   1.878 +
   1.879 +    // Options used for parsing/tokenizing.
   1.880 +    const ReadOnlyCompileOptions &options_;
   1.881 +
   1.882 +    Token               tokens[ntokens];    // circular token buffer
   1.883 +    unsigned            cursor;             // index of last parsed token
   1.884 +    unsigned            lookahead;          // count of lookahead tokens
   1.885 +    unsigned            lineno;             // current line number
   1.886 +    Flags               flags;              // flags -- see above
   1.887 +    const jschar        *linebase;          // start of current line;  points into userbuf
   1.888 +    const jschar        *prevLinebase;      // start of previous line;  nullptr if on the first line
   1.889 +    TokenBuf            userbuf;            // user input buffer
   1.890 +    const char          *filename;          // input filename or null
   1.891 +    jschar              *displayURL_;       // the user's requested source URL or null
   1.892 +    jschar              *sourceMapURL_;     // source map's filename or null
   1.893 +    CharBuffer          tokenbuf;           // current token string buffer
   1.894 +    bool                maybeEOL[256];      // probabilistic EOL lookup table
   1.895 +    bool                maybeStrSpecial[256];   // speeds up string scanning
   1.896 +    uint8_t             isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
   1.897 +    ExclusiveContext    *const cx;
   1.898 +    JSPrincipals        *const originPrincipals;
   1.899 +    StrictModeGetter    *strictModeGetter;  // used to test for strict mode
   1.900 +};
   1.901 +
   1.902 +// Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
   1.903 +// message have const jschar* type, not const char*.
   1.904 +#define JSREPORT_UC 0x100
   1.905 +
   1.906 +} // namespace frontend
   1.907 +} // namespace js
   1.908 +
   1.909 +extern JS_FRIEND_API(int)
   1.910 +js_fgets(char *buf, int size, FILE *file);
   1.911 +
   1.912 +#ifdef DEBUG
   1.913 +extern const char *
   1.914 +TokenKindToString(js::frontend::TokenKind tt);
   1.915 +#endif
   1.916 +
   1.917 +#endif /* frontend_TokenStream_h */

mercurial