michael@0: /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- michael@0: * vim: set ts=8 sts=4 et sw=4 tw=99: michael@0: * This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: #ifndef frontend_TokenStream_h michael@0: #define frontend_TokenStream_h michael@0: michael@0: // JS lexical scanner interface. michael@0: michael@0: #include "mozilla/DebugOnly.h" michael@0: #include "mozilla/PodOperations.h" michael@0: michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "jscntxt.h" michael@0: #include "jspubtd.h" michael@0: michael@0: #include "js/Vector.h" michael@0: #include "vm/RegExpObject.h" michael@0: michael@0: namespace js { michael@0: namespace frontend { michael@0: michael@0: // Values of this type are used to index into arrays such as isExprEnding[], michael@0: // so the first value must be zero. michael@0: enum TokenKind { michael@0: TOK_ERROR = 0, // well-known as the only code < EOF michael@0: TOK_EOF, // end of file michael@0: TOK_EOL, // end of line; only returned by peekTokenSameLine() michael@0: TOK_SEMI, // semicolon michael@0: TOK_COMMA, // comma operator michael@0: TOK_HOOK, TOK_COLON, // conditional (?:) michael@0: TOK_INC, TOK_DEC, // increment/decrement (++ --) michael@0: TOK_DOT, // member operator (.) michael@0: TOK_TRIPLEDOT, // for rest arguments (...) michael@0: TOK_LB, TOK_RB, // left and right brackets michael@0: TOK_LC, TOK_RC, // left and right curlies (braces) michael@0: TOK_LP, TOK_RP, // left and right parentheses michael@0: TOK_NAME, // identifier michael@0: TOK_NUMBER, // numeric constant michael@0: TOK_STRING, // string constant michael@0: TOK_REGEXP, // RegExp constant michael@0: TOK_TRUE, // true michael@0: TOK_FALSE, // false michael@0: TOK_NULL, // null michael@0: TOK_THIS, // this michael@0: TOK_FUNCTION, // function keyword michael@0: TOK_IF, // if keyword michael@0: TOK_ELSE, // else keyword michael@0: TOK_SWITCH, // switch keyword michael@0: TOK_CASE, // case keyword michael@0: TOK_DEFAULT, // default keyword michael@0: TOK_WHILE, // while keyword michael@0: TOK_DO, // do keyword michael@0: TOK_FOR, // for keyword michael@0: TOK_BREAK, // break keyword michael@0: TOK_CONTINUE, // continue keyword michael@0: TOK_VAR, // var keyword michael@0: TOK_CONST, // const keyword michael@0: TOK_WITH, // with keyword michael@0: TOK_RETURN, // return keyword michael@0: TOK_NEW, // new keyword michael@0: TOK_DELETE, // delete keyword michael@0: TOK_TRY, // try keyword michael@0: TOK_CATCH, // catch keyword michael@0: TOK_FINALLY, // finally keyword michael@0: TOK_THROW, // throw keyword michael@0: TOK_DEBUGGER, // debugger keyword michael@0: TOK_YIELD, // yield from generator function michael@0: TOK_LET, // let keyword michael@0: TOK_EXPORT, // export keyword michael@0: TOK_IMPORT, // import keyword michael@0: TOK_RESERVED, // reserved keywords michael@0: TOK_STRICT_RESERVED, // reserved keywords in strict mode michael@0: michael@0: // The following token types occupy contiguous ranges to enable easy michael@0: // range-testing. michael@0: michael@0: // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same michael@0: // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h. michael@0: TOK_OR, // logical or (||) michael@0: TOK_BINOP_FIRST = TOK_OR, michael@0: TOK_AND, // logical and (&&) michael@0: TOK_BITOR, // bitwise-or (|) michael@0: TOK_BITXOR, // bitwise-xor (^) michael@0: TOK_BITAND, // bitwise-and (&) michael@0: michael@0: // Equality operation tokens, per TokenKindIsEquality. michael@0: TOK_STRICTEQ, michael@0: TOK_EQUALITY_START = TOK_STRICTEQ, michael@0: TOK_EQ, michael@0: TOK_STRICTNE, michael@0: TOK_NE, michael@0: TOK_EQUALITY_LAST = TOK_NE, michael@0: michael@0: // Relational ops (< <= > >=), per TokenKindIsRelational. michael@0: TOK_LT, michael@0: TOK_RELOP_START = TOK_LT, michael@0: TOK_LE, michael@0: TOK_GT, michael@0: TOK_GE, michael@0: TOK_RELOP_LAST = TOK_GE, michael@0: michael@0: TOK_INSTANCEOF, // |instanceof| keyword michael@0: TOK_IN, // |in| keyword michael@0: michael@0: // Shift ops (<< >> >>>), per TokenKindIsShift. michael@0: TOK_LSH, michael@0: TOK_SHIFTOP_START = TOK_LSH, michael@0: TOK_RSH, michael@0: TOK_URSH, michael@0: TOK_SHIFTOP_LAST = TOK_URSH, michael@0: michael@0: TOK_ADD, michael@0: TOK_SUB, michael@0: TOK_MUL, michael@0: TOK_DIV, michael@0: TOK_MOD, michael@0: TOK_BINOP_LAST = TOK_MOD, michael@0: michael@0: // Unary operation tokens. michael@0: TOK_TYPEOF, michael@0: TOK_VOID, michael@0: TOK_NOT, michael@0: TOK_BITNOT, michael@0: michael@0: TOK_ARROW, // function arrow (=>) michael@0: michael@0: // Assignment ops (= += -= etc.), per TokenKindIsAssignment michael@0: TOK_ASSIGN, michael@0: TOK_ASSIGNMENT_START = TOK_ASSIGN, michael@0: TOK_ADDASSIGN, michael@0: TOK_SUBASSIGN, michael@0: TOK_BITORASSIGN, michael@0: TOK_BITXORASSIGN, michael@0: TOK_BITANDASSIGN, michael@0: TOK_LSHASSIGN, michael@0: TOK_RSHASSIGN, michael@0: TOK_URSHASSIGN, michael@0: TOK_MULASSIGN, michael@0: TOK_DIVASSIGN, michael@0: TOK_MODASSIGN, michael@0: TOK_ASSIGNMENT_LAST = TOK_MODASSIGN, michael@0: michael@0: TOK_LIMIT // domain size michael@0: }; michael@0: michael@0: inline bool michael@0: TokenKindIsBinaryOp(TokenKind tt) michael@0: { michael@0: return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST; michael@0: } michael@0: michael@0: inline bool michael@0: TokenKindIsEquality(TokenKind tt) michael@0: { michael@0: return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST; michael@0: } michael@0: michael@0: inline bool michael@0: TokenKindIsRelational(TokenKind tt) michael@0: { michael@0: return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST; michael@0: } michael@0: michael@0: inline bool michael@0: TokenKindIsShift(TokenKind tt) michael@0: { michael@0: return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST; michael@0: } michael@0: michael@0: inline bool michael@0: TokenKindIsAssignment(TokenKind tt) michael@0: { michael@0: return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST; michael@0: } michael@0: michael@0: inline bool michael@0: TokenKindIsDecl(TokenKind tt) michael@0: { michael@0: return tt == TOK_VAR || tt == TOK_LET; michael@0: } michael@0: michael@0: struct TokenPos { michael@0: uint32_t begin; // Offset of the token's first char. michael@0: uint32_t end; // Offset of 1 past the token's last char. michael@0: michael@0: TokenPos() {} michael@0: TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {} michael@0: michael@0: // Return a TokenPos that covers left, right, and anything in between. michael@0: static TokenPos box(const TokenPos &left, const TokenPos &right) { michael@0: JS_ASSERT(left.begin <= left.end); michael@0: JS_ASSERT(left.end <= right.begin); michael@0: JS_ASSERT(right.begin <= right.end); michael@0: return TokenPos(left.begin, right.end); michael@0: } michael@0: michael@0: bool operator==(const TokenPos& bpos) const { michael@0: return begin == bpos.begin && end == bpos.end; michael@0: } michael@0: michael@0: bool operator!=(const TokenPos& bpos) const { michael@0: return begin != bpos.begin || end != bpos.end; michael@0: } michael@0: michael@0: bool operator <(const TokenPos& bpos) const { michael@0: return begin < bpos.begin; michael@0: } michael@0: michael@0: bool operator <=(const TokenPos& bpos) const { michael@0: return begin <= bpos.begin; michael@0: } michael@0: michael@0: bool operator >(const TokenPos& bpos) const { michael@0: return !(*this <= bpos); michael@0: } michael@0: michael@0: bool operator >=(const TokenPos& bpos) const { michael@0: return !(*this < bpos); michael@0: } michael@0: michael@0: bool encloses(const TokenPos& pos) const { michael@0: return begin <= pos.begin && pos.end <= end; michael@0: } michael@0: }; michael@0: michael@0: enum DecimalPoint { NoDecimal = false, HasDecimal = true }; michael@0: michael@0: struct Token michael@0: { michael@0: TokenKind type; // char value or above enumerator michael@0: TokenPos pos; // token position in file michael@0: union { michael@0: private: michael@0: friend struct Token; michael@0: PropertyName *name; // non-numeric atom michael@0: JSAtom *atom; // potentially-numeric atom michael@0: struct { michael@0: double value; // floating point number michael@0: DecimalPoint decimalPoint; // literal contains '.' michael@0: } number; michael@0: RegExpFlag reflags; // regexp flags; use tokenbuf to access michael@0: // regexp chars michael@0: } u; michael@0: michael@0: // This constructor is necessary only for MSVC 2013 and how it compiles the michael@0: // initialization of TokenStream::tokens. That field is initialized as michael@0: // tokens() in the constructor init-list. This *should* zero the entire michael@0: // array, then (because Token has a non-trivial constructor, because michael@0: // TokenPos has a user-provided constructor) call the implicit Token michael@0: // constructor on each element, which would call the TokenPos constructor michael@0: // for Token::pos and do nothing. (All of which is equivalent to just michael@0: // zeroing TokenStream::tokens.) But MSVC 2013 (2010/2012 don't have this michael@0: // bug) doesn't zero out each element, so we need this extra constructor to michael@0: // make it do the right thing. (Token is used primarily by reference or michael@0: // pointer, and it's only initialized a very few places, so having a michael@0: // user-defined constructor won't hurt perf.) See also bug 920318. michael@0: Token() michael@0: : type(TOK_ERROR), michael@0: pos(0, 0) michael@0: { michael@0: } michael@0: michael@0: // Mutators michael@0: michael@0: void setName(PropertyName *name) { michael@0: JS_ASSERT(type == TOK_NAME); michael@0: JS_ASSERT(!IsPoisonedPtr(name)); michael@0: u.name = name; michael@0: } michael@0: michael@0: void setAtom(JSAtom *atom) { michael@0: JS_ASSERT(type == TOK_STRING); michael@0: JS_ASSERT(!IsPoisonedPtr(atom)); michael@0: u.atom = atom; michael@0: } michael@0: michael@0: void setRegExpFlags(js::RegExpFlag flags) { michael@0: JS_ASSERT(type == TOK_REGEXP); michael@0: JS_ASSERT((flags & AllFlags) == flags); michael@0: u.reflags = flags; michael@0: } michael@0: michael@0: void setNumber(double n, DecimalPoint decimalPoint) { michael@0: JS_ASSERT(type == TOK_NUMBER); michael@0: u.number.value = n; michael@0: u.number.decimalPoint = decimalPoint; michael@0: } michael@0: michael@0: // Type-safe accessors michael@0: michael@0: PropertyName *name() const { michael@0: JS_ASSERT(type == TOK_NAME); michael@0: return u.name->asPropertyName(); // poor-man's type verification michael@0: } michael@0: michael@0: JSAtom *atom() const { michael@0: JS_ASSERT(type == TOK_STRING); michael@0: return u.atom; michael@0: } michael@0: michael@0: js::RegExpFlag regExpFlags() const { michael@0: JS_ASSERT(type == TOK_REGEXP); michael@0: JS_ASSERT((u.reflags & AllFlags) == u.reflags); michael@0: return u.reflags; michael@0: } michael@0: michael@0: double number() const { michael@0: JS_ASSERT(type == TOK_NUMBER); michael@0: return u.number.value; michael@0: } michael@0: michael@0: DecimalPoint decimalPoint() const { michael@0: JS_ASSERT(type == TOK_NUMBER); michael@0: return u.number.decimalPoint; michael@0: } michael@0: }; michael@0: michael@0: struct CompileError { michael@0: JSErrorReport report; michael@0: char *message; michael@0: ErrorArgumentsType argumentsType; michael@0: CompileError() michael@0: : message(nullptr), argumentsType(ArgumentsAreUnicode) michael@0: { michael@0: mozilla::PodZero(&report); michael@0: } michael@0: ~CompileError(); michael@0: void throwError(JSContext *cx); michael@0: michael@0: private: michael@0: // CompileError owns raw allocated memory, so disable assignment and copying michael@0: // for safety. michael@0: void operator=(const CompileError &) MOZ_DELETE; michael@0: CompileError(const CompileError &) MOZ_DELETE; michael@0: }; michael@0: michael@0: // Ideally, tokenizing would be entirely independent of context. But the michael@0: // strict mode flag, which is in SharedContext, affects tokenizing, and michael@0: // TokenStream needs to see it. michael@0: // michael@0: // This class is a tiny back-channel from TokenStream to the strict mode flag michael@0: // that avoids exposing the rest of SharedContext to TokenStream. michael@0: // michael@0: class StrictModeGetter { michael@0: public: michael@0: virtual bool strictMode() = 0; michael@0: }; michael@0: michael@0: // TokenStream is the lexical scanner for Javascript source text. michael@0: // michael@0: // It takes a buffer of jschars and linearly scans it into |Token|s. michael@0: // Internally the class uses a four element circular buffer |tokens| of michael@0: // |Token|s. As an index for |tokens|, the member |cursor| points to the michael@0: // current token. michael@0: // Calls to getToken() increase |cursor| by one and return the new current michael@0: // token. If a TokenStream was just created, the current token is initialized michael@0: // with random data (i.e. not initialized). It is therefore important that michael@0: // one of the first four member functions listed below is called first. michael@0: // The circular buffer lets us go back up to two tokens from the last michael@0: // scanned token. Internally, the relative number of backward steps that were michael@0: // taken (via ungetToken()) after the last token was scanned is stored in michael@0: // |lookahead|. michael@0: // michael@0: // The following table lists in which situations it is safe to call each listed michael@0: // function. No checks are made by the functions in non-debug builds. michael@0: // michael@0: // Function Name | Precondition; changes to |lookahead| michael@0: // ------------------+--------------------------------------------------------- michael@0: // getToken | none; if |lookahead > 0| then |lookahead--| michael@0: // peekToken | none; if |lookahead == 0| then |lookahead == 1| michael@0: // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1| michael@0: // matchToken | none; if |lookahead > 0| and the match succeeds then michael@0: // | |lookahead--| michael@0: // consumeKnownToken | none; if |lookahead > 0| then |lookahead--| michael@0: // ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++| michael@0: // michael@0: // The behavior of the token scanning process (see getTokenInternal()) can be michael@0: // modified by calling one of the first four above listed member functions with michael@0: // an optional argument of type Modifier. However, the modifier will be michael@0: // ignored unless |lookahead == 0| holds. Due to constraints of the grammar, michael@0: // this turns out not to be a problem in practice. See the michael@0: // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?' michael@0: // for more details: michael@0: // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E). michael@0: // michael@0: // The methods seek() and tell() allow to rescan from a previous visited michael@0: // location of the buffer. michael@0: // michael@0: class MOZ_STACK_CLASS TokenStream michael@0: { michael@0: // Unicode separators that are treated as line terminators, in addition to \n, \r. michael@0: enum { michael@0: LINE_SEPARATOR = 0x2028, michael@0: PARA_SEPARATOR = 0x2029 michael@0: }; michael@0: michael@0: static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded michael@0: // to power of 2 to avoid divmod by 3 michael@0: static const unsigned maxLookahead = 2; michael@0: static const unsigned ntokensMask = ntokens - 1; michael@0: michael@0: public: michael@0: typedef Vector CharBuffer; michael@0: michael@0: TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options, michael@0: const jschar *base, size_t length, StrictModeGetter *smg); michael@0: michael@0: ~TokenStream(); michael@0: michael@0: // Accessors. michael@0: const Token ¤tToken() const { return tokens[cursor]; } michael@0: bool isCurrentTokenType(TokenKind type) const { michael@0: return currentToken().type == type; michael@0: } michael@0: const CharBuffer &getTokenbuf() const { return tokenbuf; } michael@0: const char *getFilename() const { return filename; } michael@0: unsigned getLineno() const { return lineno; } michael@0: unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; } michael@0: JSPrincipals *getOriginPrincipals() const { return originPrincipals; } michael@0: JSVersion versionNumber() const { return VersionNumber(options().version); } michael@0: JSVersion versionWithFlags() const { return options().version; } michael@0: michael@0: PropertyName *currentName() const { michael@0: if (isCurrentTokenType(TOK_YIELD)) michael@0: return cx->names().yield; michael@0: JS_ASSERT(isCurrentTokenType(TOK_NAME)); michael@0: return currentToken().name(); michael@0: } michael@0: michael@0: bool isCurrentTokenAssignment() const { michael@0: return TokenKindIsAssignment(currentToken().type); michael@0: } michael@0: michael@0: // Flag methods. michael@0: bool isEOF() const { return flags.isEOF; } michael@0: bool sawOctalEscape() const { return flags.sawOctalEscape; } michael@0: bool hadError() const { return flags.hadError; } michael@0: michael@0: // TokenStream-specific error reporters. michael@0: bool reportError(unsigned errorNumber, ...); michael@0: bool reportWarning(unsigned errorNumber, ...); michael@0: michael@0: static const uint32_t NoOffset = UINT32_MAX; michael@0: michael@0: // General-purpose error reporters. You should avoid calling these michael@0: // directly, and instead use the more succinct alternatives (e.g. michael@0: // reportError()) in TokenStream, Parser, and BytecodeEmitter. michael@0: bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, michael@0: va_list args); michael@0: bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, michael@0: va_list args); michael@0: bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, michael@0: va_list args); michael@0: michael@0: // asm.js reporter michael@0: void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...); michael@0: michael@0: private: michael@0: // These are private because they should only be called by the tokenizer michael@0: // while tokenizing not by, for example, BytecodeEmitter. michael@0: bool reportStrictModeError(unsigned errorNumber, ...); michael@0: bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); } michael@0: michael@0: void onError(); michael@0: static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb); michael@0: bool putIdentInTokenbuf(const jschar *identStart); michael@0: michael@0: struct Flags michael@0: { michael@0: bool isEOF:1; // Hit end of file. michael@0: bool isDirtyLine:1; // Non-whitespace since start of line. michael@0: bool sawOctalEscape:1; // Saw an octal character escape. michael@0: bool hadError:1; // Returned TOK_ERROR from getToken. michael@0: michael@0: Flags() michael@0: : isEOF(), isDirtyLine(), sawOctalEscape(), hadError() michael@0: {} michael@0: }; michael@0: michael@0: public: michael@0: // Sometimes the parser needs to modify how tokens are created. michael@0: enum Modifier michael@0: { michael@0: None, // Normal operation. michael@0: Operand, // Looking for an operand, not an operator. In michael@0: // practice, this means that when '/' is seen, michael@0: // we look for a regexp instead of just returning michael@0: // TOK_DIV. michael@0: KeywordIsName, // Treat keywords as names by returning TOK_NAME. michael@0: }; michael@0: michael@0: // Get the next token from the stream, make it the current token, and michael@0: // return its kind. michael@0: TokenKind getToken(Modifier modifier = None) { michael@0: // Check for a pushed-back token resulting from mismatching lookahead. michael@0: if (lookahead != 0) { michael@0: lookahead--; michael@0: cursor = (cursor + 1) & ntokensMask; michael@0: TokenKind tt = currentToken().type; michael@0: JS_ASSERT(tt != TOK_EOL); michael@0: return tt; michael@0: } michael@0: michael@0: return getTokenInternal(modifier); michael@0: } michael@0: michael@0: // Push the last scanned token back into the stream. michael@0: void ungetToken() { michael@0: JS_ASSERT(lookahead < maxLookahead); michael@0: lookahead++; michael@0: cursor = (cursor - 1) & ntokensMask; michael@0: } michael@0: michael@0: TokenKind peekToken(Modifier modifier = None) { michael@0: if (lookahead != 0) michael@0: return tokens[(cursor + 1) & ntokensMask].type; michael@0: TokenKind tt = getTokenInternal(modifier); michael@0: ungetToken(); michael@0: return tt; michael@0: } michael@0: michael@0: TokenPos peekTokenPos(Modifier modifier = None) { michael@0: if (lookahead != 0) michael@0: return tokens[(cursor + 1) & ntokensMask].pos; michael@0: getTokenInternal(modifier); michael@0: ungetToken(); michael@0: JS_ASSERT(lookahead != 0); michael@0: return tokens[(cursor + 1) & ntokensMask].pos; michael@0: } michael@0: michael@0: // This is like peekToken(), with one exception: if there is an EOL michael@0: // between the end of the current token and the start of the next token, it michael@0: // returns TOK_EOL. In that case, no token with TOK_EOL is actually michael@0: // created, just a TOK_EOL TokenKind is returned, and currentToken() michael@0: // shouldn't be consulted. (This is the only place TOK_EOL is produced.) michael@0: MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) { michael@0: const Token &curr = currentToken(); michael@0: michael@0: // If lookahead != 0, we have scanned ahead at least one token, and michael@0: // |lineno| is the line that the furthest-scanned token ends on. If michael@0: // it's the same as the line that the current token ends on, that's a michael@0: // stronger condition than what we are looking for, and we don't need michael@0: // to return TOK_EOL. michael@0: if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno)) michael@0: return tokens[(cursor + 1) & ntokensMask].type; michael@0: michael@0: // The above check misses two cases where we don't have to return michael@0: // TOK_EOL. michael@0: // - The next token starts on the same line, but is a multi-line token. michael@0: // - The next token starts on the same line, but lookahead==2 and there michael@0: // is a newline between the next token and the one after that. michael@0: // The following test is somewhat expensive but gets these cases (and michael@0: // all others) right. michael@0: (void)getToken(modifier); michael@0: const Token &next = currentToken(); michael@0: ungetToken(); michael@0: return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin) michael@0: ? next.type michael@0: : TOK_EOL; michael@0: } michael@0: michael@0: // Get the next token from the stream if its kind is |tt|. michael@0: bool matchToken(TokenKind tt, Modifier modifier = None) { michael@0: if (getToken(modifier) == tt) michael@0: return true; michael@0: ungetToken(); michael@0: return false; michael@0: } michael@0: michael@0: void consumeKnownToken(TokenKind tt) { michael@0: JS_ALWAYS_TRUE(matchToken(tt)); michael@0: } michael@0: michael@0: bool matchContextualKeyword(Handle keyword) { michael@0: if (getToken() == TOK_NAME && currentToken().name() == keyword) michael@0: return true; michael@0: ungetToken(); michael@0: return false; michael@0: } michael@0: michael@0: bool nextTokenEndsExpr() { michael@0: return isExprEnding[peekToken()]; michael@0: } michael@0: michael@0: class MOZ_STACK_CLASS Position { michael@0: public: michael@0: // The Token fields may contain pointers to atoms, so for correct michael@0: // rooting we must ensure collection of atoms is disabled while objects michael@0: // of this class are live. Do this by requiring a dummy AutoKeepAtoms michael@0: // reference in the constructor. michael@0: // michael@0: // This class is explicity ignored by the analysis, so don't add any michael@0: // more pointers to GC things here! michael@0: Position(AutoKeepAtoms&) { } michael@0: private: michael@0: Position(const Position&) MOZ_DELETE; michael@0: friend class TokenStream; michael@0: const jschar *buf; michael@0: Flags flags; michael@0: unsigned lineno; michael@0: const jschar *linebase; michael@0: const jschar *prevLinebase; michael@0: Token currentToken; michael@0: unsigned lookahead; michael@0: Token lookaheadTokens[maxLookahead]; michael@0: }; michael@0: michael@0: void advance(size_t position); michael@0: void tell(Position *); michael@0: void seek(const Position &pos); michael@0: bool seek(const Position &pos, const TokenStream &other); michael@0: michael@0: size_t positionToOffset(const Position &pos) const { michael@0: return pos.buf - userbuf.base(); michael@0: } michael@0: michael@0: const jschar *rawBase() const { michael@0: return userbuf.base(); michael@0: } michael@0: michael@0: const jschar *rawLimit() const { michael@0: return userbuf.limit(); michael@0: } michael@0: michael@0: bool hasDisplayURL() const { michael@0: return displayURL_ != nullptr; michael@0: } michael@0: michael@0: jschar *displayURL() { michael@0: return displayURL_; michael@0: } michael@0: michael@0: bool hasSourceMapURL() const { michael@0: return sourceMapURL_ != nullptr; michael@0: } michael@0: michael@0: jschar *sourceMapURL() { michael@0: return sourceMapURL_; michael@0: } michael@0: michael@0: // If the name at s[0:length] is not a keyword in this version, return michael@0: // true with *ttp unchanged. michael@0: // michael@0: // If it is a reserved word in this version and strictness mode, and thus michael@0: // can't be present in correct code, report a SyntaxError and return false. michael@0: // michael@0: // If it is a keyword, like "if", the behavior depends on ttp. If ttp is michael@0: // null, report a SyntaxError ("if is a reserved identifier") and return michael@0: // false. If ttp is non-null, return true with the keyword's TokenKind in michael@0: // *ttp. michael@0: bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp); michael@0: michael@0: // This class maps a userbuf offset (which is 0-indexed) to a line number michael@0: // (which is 1-indexed) and a column index (which is 0-indexed). michael@0: class SourceCoords michael@0: { michael@0: // For a given buffer holding source code, |lineStartOffsets_| has one michael@0: // element per line of source code, plus one sentinel element. Each michael@0: // non-sentinel element holds the buffer offset for the start of the michael@0: // corresponding line of source code. For this example script: michael@0: // michael@0: // 1 // xyz [line starts at offset 0] michael@0: // 2 var x; [line starts at offset 7] michael@0: // 3 [line starts at offset 14] michael@0: // 4 var y; [line starts at offset 15] michael@0: // michael@0: // |lineStartOffsets_| is: michael@0: // michael@0: // [0, 7, 14, 15, MAX_PTR] michael@0: // michael@0: // To convert a "line number" to a "line index" (i.e. an index into michael@0: // |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's michael@0: // line index is (3 - initialLineNum_), which is 2. Therefore michael@0: // lineStartOffsets_[2] holds the buffer offset for the start of line 3, michael@0: // which is 14. (Note that |initialLineNum_| is often 1, but not michael@0: // always.) michael@0: // michael@0: // The first element is always 0, and the last element is always the michael@0: // MAX_PTR sentinel. michael@0: // michael@0: // offset-to-line/column lookups are O(log n) in the worst case (binary michael@0: // search), but in practice they're heavily clustered and we do better michael@0: // than that by using the previous lookup's result (lastLineIndex_) as michael@0: // a starting point. michael@0: // michael@0: // Checking if an offset lies within a particular line number michael@0: // (isOnThisLine()) is O(1). michael@0: // michael@0: Vector lineStartOffsets_; michael@0: uint32_t initialLineNum_; michael@0: michael@0: // This is mutable because it's modified on every search, but that fact michael@0: // isn't visible outside this class. michael@0: mutable uint32_t lastLineIndex_; michael@0: michael@0: uint32_t lineIndexOf(uint32_t offset) const; michael@0: michael@0: static const uint32_t MAX_PTR = UINT32_MAX; michael@0: michael@0: uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; } michael@0: uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; } michael@0: michael@0: public: michael@0: SourceCoords(ExclusiveContext *cx, uint32_t ln); michael@0: michael@0: void add(uint32_t lineNum, uint32_t lineStartOffset); michael@0: bool fill(const SourceCoords &other); michael@0: michael@0: bool isOnThisLine(uint32_t offset, uint32_t lineNum) const { michael@0: uint32_t lineIndex = lineNumToIndex(lineNum); michael@0: JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length()); // +1 due to sentinel michael@0: return lineStartOffsets_[lineIndex] <= offset && michael@0: offset < lineStartOffsets_[lineIndex + 1]; michael@0: } michael@0: michael@0: uint32_t lineNum(uint32_t offset) const; michael@0: uint32_t columnIndex(uint32_t offset) const; michael@0: void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const; michael@0: }; michael@0: michael@0: SourceCoords srcCoords; michael@0: michael@0: JSAtomState &names() const { michael@0: return cx->names(); michael@0: } michael@0: michael@0: ExclusiveContext *context() const { michael@0: return cx; michael@0: } michael@0: michael@0: const ReadOnlyCompileOptions &options() const { michael@0: return options_; michael@0: } michael@0: michael@0: private: michael@0: // This is the low-level interface to the JS source code buffer. It just michael@0: // gets raw chars, basically. TokenStreams functions are layered on top michael@0: // and do some extra stuff like converting all EOL sequences to '\n', michael@0: // tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw michael@0: // chars" refers to the lack of EOL sequence normalization.) michael@0: class TokenBuf { michael@0: public: michael@0: TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length) michael@0: : base_(buf), limit_(buf + length), ptr(buf) michael@0: { } michael@0: michael@0: bool hasRawChars() const { michael@0: return ptr < limit_; michael@0: } michael@0: michael@0: bool atStart() const { michael@0: return ptr == base_; michael@0: } michael@0: michael@0: const jschar *base() const { michael@0: return base_; michael@0: } michael@0: michael@0: const jschar *limit() const { michael@0: return limit_; michael@0: } michael@0: michael@0: jschar getRawChar() { michael@0: return *ptr++; // this will nullptr-crash if poisoned michael@0: } michael@0: michael@0: jschar peekRawChar() const { michael@0: return *ptr; // this will nullptr-crash if poisoned michael@0: } michael@0: michael@0: bool matchRawChar(jschar c) { michael@0: if (*ptr == c) { // this will nullptr-crash if poisoned michael@0: ptr++; michael@0: return true; michael@0: } michael@0: return false; michael@0: } michael@0: michael@0: bool matchRawCharBackwards(jschar c) { michael@0: JS_ASSERT(ptr); // make sure it hasn't been poisoned michael@0: if (*(ptr - 1) == c) { michael@0: ptr--; michael@0: return true; michael@0: } michael@0: return false; michael@0: } michael@0: michael@0: void ungetRawChar() { michael@0: JS_ASSERT(ptr); // make sure it hasn't been poisoned michael@0: ptr--; michael@0: } michael@0: michael@0: const jschar *addressOfNextRawChar(bool allowPoisoned = false) const { michael@0: JS_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned michael@0: return ptr; michael@0: } michael@0: michael@0: // Use this with caution! michael@0: void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) { michael@0: JS_ASSERT_IF(!allowPoisoned, a); michael@0: ptr = a; michael@0: } michael@0: michael@0: #ifdef DEBUG michael@0: // Poison the TokenBuf so it cannot be accessed again. michael@0: void poison() { michael@0: ptr = nullptr; michael@0: } michael@0: #endif michael@0: michael@0: static bool isRawEOLChar(int32_t c) { michael@0: return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR; michael@0: } michael@0: michael@0: // Finds the next EOL, but stops once 'max' jschars have been scanned michael@0: // (*including* the starting jschar). michael@0: const jschar *findEOLMax(const jschar *p, size_t max); michael@0: michael@0: private: michael@0: const jschar *base_; // base of buffer michael@0: const jschar *limit_; // limit for quick bounds check michael@0: const jschar *ptr; // next char to get michael@0: }; michael@0: michael@0: TokenKind getTokenInternal(Modifier modifier); michael@0: michael@0: int32_t getChar(); michael@0: int32_t getCharIgnoreEOL(); michael@0: void ungetChar(int32_t c); michael@0: void ungetCharIgnoreEOL(int32_t c); michael@0: Token *newToken(ptrdiff_t adjust); michael@0: bool peekUnicodeEscape(int32_t *c); michael@0: bool matchUnicodeEscapeIdStart(int32_t *c); michael@0: bool matchUnicodeEscapeIdent(int32_t *c); michael@0: bool peekChars(int n, jschar *cp); michael@0: michael@0: bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); michael@0: bool getDirective(bool isMultiline, bool shouldWarnDeprecated, michael@0: const char *directive, int directiveLength, michael@0: const char *errorMsgPragma, jschar **destination); michael@0: bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated); michael@0: bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated); michael@0: michael@0: // |expect| cannot be an EOL char. michael@0: bool matchChar(int32_t expect) { michael@0: MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect)); michael@0: return MOZ_LIKELY(userbuf.hasRawChars()) && michael@0: userbuf.matchRawChar(expect); michael@0: } michael@0: michael@0: void consumeKnownChar(int32_t expect) { michael@0: mozilla::DebugOnly c = getChar(); michael@0: JS_ASSERT(c == expect); michael@0: } michael@0: michael@0: int32_t peekChar() { michael@0: int32_t c = getChar(); michael@0: ungetChar(c); michael@0: return c; michael@0: } michael@0: michael@0: void skipChars(int n) { michael@0: while (--n >= 0) michael@0: getChar(); michael@0: } michael@0: michael@0: void updateLineInfoForEOL(); michael@0: void updateFlagsForEOL(); michael@0: michael@0: // Options used for parsing/tokenizing. michael@0: const ReadOnlyCompileOptions &options_; michael@0: michael@0: Token tokens[ntokens]; // circular token buffer michael@0: unsigned cursor; // index of last parsed token michael@0: unsigned lookahead; // count of lookahead tokens michael@0: unsigned lineno; // current line number michael@0: Flags flags; // flags -- see above michael@0: const jschar *linebase; // start of current line; points into userbuf michael@0: const jschar *prevLinebase; // start of previous line; nullptr if on the first line michael@0: TokenBuf userbuf; // user input buffer michael@0: const char *filename; // input filename or null michael@0: jschar *displayURL_; // the user's requested source URL or null michael@0: jschar *sourceMapURL_; // source map's filename or null michael@0: CharBuffer tokenbuf; // current token string buffer michael@0: bool maybeEOL[256]; // probabilistic EOL lookup table michael@0: bool maybeStrSpecial[256]; // speeds up string scanning michael@0: uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs? michael@0: ExclusiveContext *const cx; michael@0: JSPrincipals *const originPrincipals; michael@0: StrictModeGetter *strictModeGetter; // used to test for strict mode michael@0: }; michael@0: michael@0: // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error michael@0: // message have const jschar* type, not const char*. michael@0: #define JSREPORT_UC 0x100 michael@0: michael@0: } // namespace frontend michael@0: } // namespace js michael@0: michael@0: extern JS_FRIEND_API(int) michael@0: js_fgets(char *buf, int size, FILE *file); michael@0: michael@0: #ifdef DEBUG michael@0: extern const char * michael@0: TokenKindToString(js::frontend::TokenKind tt); michael@0: #endif michael@0: michael@0: #endif /* frontend_TokenStream_h */