1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/frontend/TokenStream.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,914 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +#ifndef frontend_TokenStream_h 1.11 +#define frontend_TokenStream_h 1.12 + 1.13 +// JS lexical scanner interface. 1.14 + 1.15 +#include "mozilla/DebugOnly.h" 1.16 +#include "mozilla/PodOperations.h" 1.17 + 1.18 +#include <stdarg.h> 1.19 +#include <stddef.h> 1.20 +#include <stdio.h> 1.21 + 1.22 +#include "jscntxt.h" 1.23 +#include "jspubtd.h" 1.24 + 1.25 +#include "js/Vector.h" 1.26 +#include "vm/RegExpObject.h" 1.27 + 1.28 +namespace js { 1.29 +namespace frontend { 1.30 + 1.31 +// Values of this type are used to index into arrays such as isExprEnding[], 1.32 +// so the first value must be zero. 1.33 +enum TokenKind { 1.34 + TOK_ERROR = 0, // well-known as the only code < EOF 1.35 + TOK_EOF, // end of file 1.36 + TOK_EOL, // end of line; only returned by peekTokenSameLine() 1.37 + TOK_SEMI, // semicolon 1.38 + TOK_COMMA, // comma operator 1.39 + TOK_HOOK, TOK_COLON, // conditional (?:) 1.40 + TOK_INC, TOK_DEC, // increment/decrement (++ --) 1.41 + TOK_DOT, // member operator (.) 1.42 + TOK_TRIPLEDOT, // for rest arguments (...) 1.43 + TOK_LB, TOK_RB, // left and right brackets 1.44 + TOK_LC, TOK_RC, // left and right curlies (braces) 1.45 + TOK_LP, TOK_RP, // left and right parentheses 1.46 + TOK_NAME, // identifier 1.47 + TOK_NUMBER, // numeric constant 1.48 + TOK_STRING, // string constant 1.49 + TOK_REGEXP, // RegExp constant 1.50 + TOK_TRUE, // true 1.51 + TOK_FALSE, // false 1.52 + TOK_NULL, // null 1.53 + TOK_THIS, // this 1.54 + TOK_FUNCTION, // function keyword 1.55 + TOK_IF, // if keyword 1.56 + TOK_ELSE, // else keyword 1.57 + TOK_SWITCH, // switch keyword 1.58 + TOK_CASE, // case keyword 1.59 + TOK_DEFAULT, // default keyword 1.60 + TOK_WHILE, // while keyword 1.61 + TOK_DO, // do keyword 1.62 + TOK_FOR, // for keyword 1.63 + TOK_BREAK, // break keyword 1.64 + TOK_CONTINUE, // continue keyword 1.65 + TOK_VAR, // var keyword 1.66 + TOK_CONST, // const keyword 1.67 + TOK_WITH, // with keyword 1.68 + TOK_RETURN, // return keyword 1.69 + TOK_NEW, // new keyword 1.70 + TOK_DELETE, // delete keyword 1.71 + TOK_TRY, // try keyword 1.72 + TOK_CATCH, // catch keyword 1.73 + TOK_FINALLY, // finally keyword 1.74 + TOK_THROW, // throw keyword 1.75 + TOK_DEBUGGER, // debugger keyword 1.76 + TOK_YIELD, // yield from generator function 1.77 + TOK_LET, // let keyword 1.78 + TOK_EXPORT, // export keyword 1.79 + TOK_IMPORT, // import keyword 1.80 + TOK_RESERVED, // reserved keywords 1.81 + TOK_STRICT_RESERVED, // reserved keywords in strict mode 1.82 + 1.83 + // The following token types occupy contiguous ranges to enable easy 1.84 + // range-testing. 1.85 + 1.86 + // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same 1.87 + // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h. 1.88 + TOK_OR, // logical or (||) 1.89 + TOK_BINOP_FIRST = TOK_OR, 1.90 + TOK_AND, // logical and (&&) 1.91 + TOK_BITOR, // bitwise-or (|) 1.92 + TOK_BITXOR, // bitwise-xor (^) 1.93 + TOK_BITAND, // bitwise-and (&) 1.94 + 1.95 + // Equality operation tokens, per TokenKindIsEquality. 1.96 + TOK_STRICTEQ, 1.97 + TOK_EQUALITY_START = TOK_STRICTEQ, 1.98 + TOK_EQ, 1.99 + TOK_STRICTNE, 1.100 + TOK_NE, 1.101 + TOK_EQUALITY_LAST = TOK_NE, 1.102 + 1.103 + // Relational ops (< <= > >=), per TokenKindIsRelational. 1.104 + TOK_LT, 1.105 + TOK_RELOP_START = TOK_LT, 1.106 + TOK_LE, 1.107 + TOK_GT, 1.108 + TOK_GE, 1.109 + TOK_RELOP_LAST = TOK_GE, 1.110 + 1.111 + TOK_INSTANCEOF, // |instanceof| keyword 1.112 + TOK_IN, // |in| keyword 1.113 + 1.114 + // Shift ops (<< >> >>>), per TokenKindIsShift. 1.115 + TOK_LSH, 1.116 + TOK_SHIFTOP_START = TOK_LSH, 1.117 + TOK_RSH, 1.118 + TOK_URSH, 1.119 + TOK_SHIFTOP_LAST = TOK_URSH, 1.120 + 1.121 + TOK_ADD, 1.122 + TOK_SUB, 1.123 + TOK_MUL, 1.124 + TOK_DIV, 1.125 + TOK_MOD, 1.126 + TOK_BINOP_LAST = TOK_MOD, 1.127 + 1.128 + // Unary operation tokens. 1.129 + TOK_TYPEOF, 1.130 + TOK_VOID, 1.131 + TOK_NOT, 1.132 + TOK_BITNOT, 1.133 + 1.134 + TOK_ARROW, // function arrow (=>) 1.135 + 1.136 + // Assignment ops (= += -= etc.), per TokenKindIsAssignment 1.137 + TOK_ASSIGN, 1.138 + TOK_ASSIGNMENT_START = TOK_ASSIGN, 1.139 + TOK_ADDASSIGN, 1.140 + TOK_SUBASSIGN, 1.141 + TOK_BITORASSIGN, 1.142 + TOK_BITXORASSIGN, 1.143 + TOK_BITANDASSIGN, 1.144 + TOK_LSHASSIGN, 1.145 + TOK_RSHASSIGN, 1.146 + TOK_URSHASSIGN, 1.147 + TOK_MULASSIGN, 1.148 + TOK_DIVASSIGN, 1.149 + TOK_MODASSIGN, 1.150 + TOK_ASSIGNMENT_LAST = TOK_MODASSIGN, 1.151 + 1.152 + TOK_LIMIT // domain size 1.153 +}; 1.154 + 1.155 +inline bool 1.156 +TokenKindIsBinaryOp(TokenKind tt) 1.157 +{ 1.158 + return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST; 1.159 +} 1.160 + 1.161 +inline bool 1.162 +TokenKindIsEquality(TokenKind tt) 1.163 +{ 1.164 + return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST; 1.165 +} 1.166 + 1.167 +inline bool 1.168 +TokenKindIsRelational(TokenKind tt) 1.169 +{ 1.170 + return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST; 1.171 +} 1.172 + 1.173 +inline bool 1.174 +TokenKindIsShift(TokenKind tt) 1.175 +{ 1.176 + return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST; 1.177 +} 1.178 + 1.179 +inline bool 1.180 +TokenKindIsAssignment(TokenKind tt) 1.181 +{ 1.182 + return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST; 1.183 +} 1.184 + 1.185 +inline bool 1.186 +TokenKindIsDecl(TokenKind tt) 1.187 +{ 1.188 + return tt == TOK_VAR || tt == TOK_LET; 1.189 +} 1.190 + 1.191 +struct TokenPos { 1.192 + uint32_t begin; // Offset of the token's first char. 1.193 + uint32_t end; // Offset of 1 past the token's last char. 1.194 + 1.195 + TokenPos() {} 1.196 + TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {} 1.197 + 1.198 + // Return a TokenPos that covers left, right, and anything in between. 1.199 + static TokenPos box(const TokenPos &left, const TokenPos &right) { 1.200 + JS_ASSERT(left.begin <= left.end); 1.201 + JS_ASSERT(left.end <= right.begin); 1.202 + JS_ASSERT(right.begin <= right.end); 1.203 + return TokenPos(left.begin, right.end); 1.204 + } 1.205 + 1.206 + bool operator==(const TokenPos& bpos) const { 1.207 + return begin == bpos.begin && end == bpos.end; 1.208 + } 1.209 + 1.210 + bool operator!=(const TokenPos& bpos) const { 1.211 + return begin != bpos.begin || end != bpos.end; 1.212 + } 1.213 + 1.214 + bool operator <(const TokenPos& bpos) const { 1.215 + return begin < bpos.begin; 1.216 + } 1.217 + 1.218 + bool operator <=(const TokenPos& bpos) const { 1.219 + return begin <= bpos.begin; 1.220 + } 1.221 + 1.222 + bool operator >(const TokenPos& bpos) const { 1.223 + return !(*this <= bpos); 1.224 + } 1.225 + 1.226 + bool operator >=(const TokenPos& bpos) const { 1.227 + return !(*this < bpos); 1.228 + } 1.229 + 1.230 + bool encloses(const TokenPos& pos) const { 1.231 + return begin <= pos.begin && pos.end <= end; 1.232 + } 1.233 +}; 1.234 + 1.235 +enum DecimalPoint { NoDecimal = false, HasDecimal = true }; 1.236 + 1.237 +struct Token 1.238 +{ 1.239 + TokenKind type; // char value or above enumerator 1.240 + TokenPos pos; // token position in file 1.241 + union { 1.242 + private: 1.243 + friend struct Token; 1.244 + PropertyName *name; // non-numeric atom 1.245 + JSAtom *atom; // potentially-numeric atom 1.246 + struct { 1.247 + double value; // floating point number 1.248 + DecimalPoint decimalPoint; // literal contains '.' 1.249 + } number; 1.250 + RegExpFlag reflags; // regexp flags; use tokenbuf to access 1.251 + // regexp chars 1.252 + } u; 1.253 + 1.254 + // This constructor is necessary only for MSVC 2013 and how it compiles the 1.255 + // initialization of TokenStream::tokens. That field is initialized as 1.256 + // tokens() in the constructor init-list. This *should* zero the entire 1.257 + // array, then (because Token has a non-trivial constructor, because 1.258 + // TokenPos has a user-provided constructor) call the implicit Token 1.259 + // constructor on each element, which would call the TokenPos constructor 1.260 + // for Token::pos and do nothing. (All of which is equivalent to just 1.261 + // zeroing TokenStream::tokens.) But MSVC 2013 (2010/2012 don't have this 1.262 + // bug) doesn't zero out each element, so we need this extra constructor to 1.263 + // make it do the right thing. (Token is used primarily by reference or 1.264 + // pointer, and it's only initialized a very few places, so having a 1.265 + // user-defined constructor won't hurt perf.) See also bug 920318. 1.266 + Token() 1.267 + : type(TOK_ERROR), 1.268 + pos(0, 0) 1.269 + { 1.270 + } 1.271 + 1.272 + // Mutators 1.273 + 1.274 + void setName(PropertyName *name) { 1.275 + JS_ASSERT(type == TOK_NAME); 1.276 + JS_ASSERT(!IsPoisonedPtr(name)); 1.277 + u.name = name; 1.278 + } 1.279 + 1.280 + void setAtom(JSAtom *atom) { 1.281 + JS_ASSERT(type == TOK_STRING); 1.282 + JS_ASSERT(!IsPoisonedPtr(atom)); 1.283 + u.atom = atom; 1.284 + } 1.285 + 1.286 + void setRegExpFlags(js::RegExpFlag flags) { 1.287 + JS_ASSERT(type == TOK_REGEXP); 1.288 + JS_ASSERT((flags & AllFlags) == flags); 1.289 + u.reflags = flags; 1.290 + } 1.291 + 1.292 + void setNumber(double n, DecimalPoint decimalPoint) { 1.293 + JS_ASSERT(type == TOK_NUMBER); 1.294 + u.number.value = n; 1.295 + u.number.decimalPoint = decimalPoint; 1.296 + } 1.297 + 1.298 + // Type-safe accessors 1.299 + 1.300 + PropertyName *name() const { 1.301 + JS_ASSERT(type == TOK_NAME); 1.302 + return u.name->asPropertyName(); // poor-man's type verification 1.303 + } 1.304 + 1.305 + JSAtom *atom() const { 1.306 + JS_ASSERT(type == TOK_STRING); 1.307 + return u.atom; 1.308 + } 1.309 + 1.310 + js::RegExpFlag regExpFlags() const { 1.311 + JS_ASSERT(type == TOK_REGEXP); 1.312 + JS_ASSERT((u.reflags & AllFlags) == u.reflags); 1.313 + return u.reflags; 1.314 + } 1.315 + 1.316 + double number() const { 1.317 + JS_ASSERT(type == TOK_NUMBER); 1.318 + return u.number.value; 1.319 + } 1.320 + 1.321 + DecimalPoint decimalPoint() const { 1.322 + JS_ASSERT(type == TOK_NUMBER); 1.323 + return u.number.decimalPoint; 1.324 + } 1.325 +}; 1.326 + 1.327 +struct CompileError { 1.328 + JSErrorReport report; 1.329 + char *message; 1.330 + ErrorArgumentsType argumentsType; 1.331 + CompileError() 1.332 + : message(nullptr), argumentsType(ArgumentsAreUnicode) 1.333 + { 1.334 + mozilla::PodZero(&report); 1.335 + } 1.336 + ~CompileError(); 1.337 + void throwError(JSContext *cx); 1.338 + 1.339 + private: 1.340 + // CompileError owns raw allocated memory, so disable assignment and copying 1.341 + // for safety. 1.342 + void operator=(const CompileError &) MOZ_DELETE; 1.343 + CompileError(const CompileError &) MOZ_DELETE; 1.344 +}; 1.345 + 1.346 +// Ideally, tokenizing would be entirely independent of context. But the 1.347 +// strict mode flag, which is in SharedContext, affects tokenizing, and 1.348 +// TokenStream needs to see it. 1.349 +// 1.350 +// This class is a tiny back-channel from TokenStream to the strict mode flag 1.351 +// that avoids exposing the rest of SharedContext to TokenStream. 1.352 +// 1.353 +class StrictModeGetter { 1.354 + public: 1.355 + virtual bool strictMode() = 0; 1.356 +}; 1.357 + 1.358 +// TokenStream is the lexical scanner for Javascript source text. 1.359 +// 1.360 +// It takes a buffer of jschars and linearly scans it into |Token|s. 1.361 +// Internally the class uses a four element circular buffer |tokens| of 1.362 +// |Token|s. As an index for |tokens|, the member |cursor| points to the 1.363 +// current token. 1.364 +// Calls to getToken() increase |cursor| by one and return the new current 1.365 +// token. If a TokenStream was just created, the current token is initialized 1.366 +// with random data (i.e. not initialized). It is therefore important that 1.367 +// one of the first four member functions listed below is called first. 1.368 +// The circular buffer lets us go back up to two tokens from the last 1.369 +// scanned token. Internally, the relative number of backward steps that were 1.370 +// taken (via ungetToken()) after the last token was scanned is stored in 1.371 +// |lookahead|. 1.372 +// 1.373 +// The following table lists in which situations it is safe to call each listed 1.374 +// function. No checks are made by the functions in non-debug builds. 1.375 +// 1.376 +// Function Name | Precondition; changes to |lookahead| 1.377 +// ------------------+--------------------------------------------------------- 1.378 +// getToken | none; if |lookahead > 0| then |lookahead--| 1.379 +// peekToken | none; if |lookahead == 0| then |lookahead == 1| 1.380 +// peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1| 1.381 +// matchToken | none; if |lookahead > 0| and the match succeeds then 1.382 +// | |lookahead--| 1.383 +// consumeKnownToken | none; if |lookahead > 0| then |lookahead--| 1.384 +// ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++| 1.385 +// 1.386 +// The behavior of the token scanning process (see getTokenInternal()) can be 1.387 +// modified by calling one of the first four above listed member functions with 1.388 +// an optional argument of type Modifier. However, the modifier will be 1.389 +// ignored unless |lookahead == 0| holds. Due to constraints of the grammar, 1.390 +// this turns out not to be a problem in practice. See the 1.391 +// mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?' 1.392 +// for more details: 1.393 +// https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E). 1.394 +// 1.395 +// The methods seek() and tell() allow to rescan from a previous visited 1.396 +// location of the buffer. 1.397 +// 1.398 +class MOZ_STACK_CLASS TokenStream 1.399 +{ 1.400 + // Unicode separators that are treated as line terminators, in addition to \n, \r. 1.401 + enum { 1.402 + LINE_SEPARATOR = 0x2028, 1.403 + PARA_SEPARATOR = 0x2029 1.404 + }; 1.405 + 1.406 + static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded 1.407 + // to power of 2 to avoid divmod by 3 1.408 + static const unsigned maxLookahead = 2; 1.409 + static const unsigned ntokensMask = ntokens - 1; 1.410 + 1.411 + public: 1.412 + typedef Vector<jschar, 32> CharBuffer; 1.413 + 1.414 + TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options, 1.415 + const jschar *base, size_t length, StrictModeGetter *smg); 1.416 + 1.417 + ~TokenStream(); 1.418 + 1.419 + // Accessors. 1.420 + const Token ¤tToken() const { return tokens[cursor]; } 1.421 + bool isCurrentTokenType(TokenKind type) const { 1.422 + return currentToken().type == type; 1.423 + } 1.424 + const CharBuffer &getTokenbuf() const { return tokenbuf; } 1.425 + const char *getFilename() const { return filename; } 1.426 + unsigned getLineno() const { return lineno; } 1.427 + unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; } 1.428 + JSPrincipals *getOriginPrincipals() const { return originPrincipals; } 1.429 + JSVersion versionNumber() const { return VersionNumber(options().version); } 1.430 + JSVersion versionWithFlags() const { return options().version; } 1.431 + 1.432 + PropertyName *currentName() const { 1.433 + if (isCurrentTokenType(TOK_YIELD)) 1.434 + return cx->names().yield; 1.435 + JS_ASSERT(isCurrentTokenType(TOK_NAME)); 1.436 + return currentToken().name(); 1.437 + } 1.438 + 1.439 + bool isCurrentTokenAssignment() const { 1.440 + return TokenKindIsAssignment(currentToken().type); 1.441 + } 1.442 + 1.443 + // Flag methods. 1.444 + bool isEOF() const { return flags.isEOF; } 1.445 + bool sawOctalEscape() const { return flags.sawOctalEscape; } 1.446 + bool hadError() const { return flags.hadError; } 1.447 + 1.448 + // TokenStream-specific error reporters. 1.449 + bool reportError(unsigned errorNumber, ...); 1.450 + bool reportWarning(unsigned errorNumber, ...); 1.451 + 1.452 + static const uint32_t NoOffset = UINT32_MAX; 1.453 + 1.454 + // General-purpose error reporters. You should avoid calling these 1.455 + // directly, and instead use the more succinct alternatives (e.g. 1.456 + // reportError()) in TokenStream, Parser, and BytecodeEmitter. 1.457 + bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, 1.458 + va_list args); 1.459 + bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, 1.460 + va_list args); 1.461 + bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, 1.462 + va_list args); 1.463 + 1.464 + // asm.js reporter 1.465 + void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...); 1.466 + 1.467 + private: 1.468 + // These are private because they should only be called by the tokenizer 1.469 + // while tokenizing not by, for example, BytecodeEmitter. 1.470 + bool reportStrictModeError(unsigned errorNumber, ...); 1.471 + bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); } 1.472 + 1.473 + void onError(); 1.474 + static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb); 1.475 + bool putIdentInTokenbuf(const jschar *identStart); 1.476 + 1.477 + struct Flags 1.478 + { 1.479 + bool isEOF:1; // Hit end of file. 1.480 + bool isDirtyLine:1; // Non-whitespace since start of line. 1.481 + bool sawOctalEscape:1; // Saw an octal character escape. 1.482 + bool hadError:1; // Returned TOK_ERROR from getToken. 1.483 + 1.484 + Flags() 1.485 + : isEOF(), isDirtyLine(), sawOctalEscape(), hadError() 1.486 + {} 1.487 + }; 1.488 + 1.489 + public: 1.490 + // Sometimes the parser needs to modify how tokens are created. 1.491 + enum Modifier 1.492 + { 1.493 + None, // Normal operation. 1.494 + Operand, // Looking for an operand, not an operator. In 1.495 + // practice, this means that when '/' is seen, 1.496 + // we look for a regexp instead of just returning 1.497 + // TOK_DIV. 1.498 + KeywordIsName, // Treat keywords as names by returning TOK_NAME. 1.499 + }; 1.500 + 1.501 + // Get the next token from the stream, make it the current token, and 1.502 + // return its kind. 1.503 + TokenKind getToken(Modifier modifier = None) { 1.504 + // Check for a pushed-back token resulting from mismatching lookahead. 1.505 + if (lookahead != 0) { 1.506 + lookahead--; 1.507 + cursor = (cursor + 1) & ntokensMask; 1.508 + TokenKind tt = currentToken().type; 1.509 + JS_ASSERT(tt != TOK_EOL); 1.510 + return tt; 1.511 + } 1.512 + 1.513 + return getTokenInternal(modifier); 1.514 + } 1.515 + 1.516 + // Push the last scanned token back into the stream. 1.517 + void ungetToken() { 1.518 + JS_ASSERT(lookahead < maxLookahead); 1.519 + lookahead++; 1.520 + cursor = (cursor - 1) & ntokensMask; 1.521 + } 1.522 + 1.523 + TokenKind peekToken(Modifier modifier = None) { 1.524 + if (lookahead != 0) 1.525 + return tokens[(cursor + 1) & ntokensMask].type; 1.526 + TokenKind tt = getTokenInternal(modifier); 1.527 + ungetToken(); 1.528 + return tt; 1.529 + } 1.530 + 1.531 + TokenPos peekTokenPos(Modifier modifier = None) { 1.532 + if (lookahead != 0) 1.533 + return tokens[(cursor + 1) & ntokensMask].pos; 1.534 + getTokenInternal(modifier); 1.535 + ungetToken(); 1.536 + JS_ASSERT(lookahead != 0); 1.537 + return tokens[(cursor + 1) & ntokensMask].pos; 1.538 + } 1.539 + 1.540 + // This is like peekToken(), with one exception: if there is an EOL 1.541 + // between the end of the current token and the start of the next token, it 1.542 + // returns TOK_EOL. In that case, no token with TOK_EOL is actually 1.543 + // created, just a TOK_EOL TokenKind is returned, and currentToken() 1.544 + // shouldn't be consulted. (This is the only place TOK_EOL is produced.) 1.545 + MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) { 1.546 + const Token &curr = currentToken(); 1.547 + 1.548 + // If lookahead != 0, we have scanned ahead at least one token, and 1.549 + // |lineno| is the line that the furthest-scanned token ends on. If 1.550 + // it's the same as the line that the current token ends on, that's a 1.551 + // stronger condition than what we are looking for, and we don't need 1.552 + // to return TOK_EOL. 1.553 + if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno)) 1.554 + return tokens[(cursor + 1) & ntokensMask].type; 1.555 + 1.556 + // The above check misses two cases where we don't have to return 1.557 + // TOK_EOL. 1.558 + // - The next token starts on the same line, but is a multi-line token. 1.559 + // - The next token starts on the same line, but lookahead==2 and there 1.560 + // is a newline between the next token and the one after that. 1.561 + // The following test is somewhat expensive but gets these cases (and 1.562 + // all others) right. 1.563 + (void)getToken(modifier); 1.564 + const Token &next = currentToken(); 1.565 + ungetToken(); 1.566 + return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin) 1.567 + ? next.type 1.568 + : TOK_EOL; 1.569 + } 1.570 + 1.571 + // Get the next token from the stream if its kind is |tt|. 1.572 + bool matchToken(TokenKind tt, Modifier modifier = None) { 1.573 + if (getToken(modifier) == tt) 1.574 + return true; 1.575 + ungetToken(); 1.576 + return false; 1.577 + } 1.578 + 1.579 + void consumeKnownToken(TokenKind tt) { 1.580 + JS_ALWAYS_TRUE(matchToken(tt)); 1.581 + } 1.582 + 1.583 + bool matchContextualKeyword(Handle<PropertyName*> keyword) { 1.584 + if (getToken() == TOK_NAME && currentToken().name() == keyword) 1.585 + return true; 1.586 + ungetToken(); 1.587 + return false; 1.588 + } 1.589 + 1.590 + bool nextTokenEndsExpr() { 1.591 + return isExprEnding[peekToken()]; 1.592 + } 1.593 + 1.594 + class MOZ_STACK_CLASS Position { 1.595 + public: 1.596 + // The Token fields may contain pointers to atoms, so for correct 1.597 + // rooting we must ensure collection of atoms is disabled while objects 1.598 + // of this class are live. Do this by requiring a dummy AutoKeepAtoms 1.599 + // reference in the constructor. 1.600 + // 1.601 + // This class is explicity ignored by the analysis, so don't add any 1.602 + // more pointers to GC things here! 1.603 + Position(AutoKeepAtoms&) { } 1.604 + private: 1.605 + Position(const Position&) MOZ_DELETE; 1.606 + friend class TokenStream; 1.607 + const jschar *buf; 1.608 + Flags flags; 1.609 + unsigned lineno; 1.610 + const jschar *linebase; 1.611 + const jschar *prevLinebase; 1.612 + Token currentToken; 1.613 + unsigned lookahead; 1.614 + Token lookaheadTokens[maxLookahead]; 1.615 + }; 1.616 + 1.617 + void advance(size_t position); 1.618 + void tell(Position *); 1.619 + void seek(const Position &pos); 1.620 + bool seek(const Position &pos, const TokenStream &other); 1.621 + 1.622 + size_t positionToOffset(const Position &pos) const { 1.623 + return pos.buf - userbuf.base(); 1.624 + } 1.625 + 1.626 + const jschar *rawBase() const { 1.627 + return userbuf.base(); 1.628 + } 1.629 + 1.630 + const jschar *rawLimit() const { 1.631 + return userbuf.limit(); 1.632 + } 1.633 + 1.634 + bool hasDisplayURL() const { 1.635 + return displayURL_ != nullptr; 1.636 + } 1.637 + 1.638 + jschar *displayURL() { 1.639 + return displayURL_; 1.640 + } 1.641 + 1.642 + bool hasSourceMapURL() const { 1.643 + return sourceMapURL_ != nullptr; 1.644 + } 1.645 + 1.646 + jschar *sourceMapURL() { 1.647 + return sourceMapURL_; 1.648 + } 1.649 + 1.650 + // If the name at s[0:length] is not a keyword in this version, return 1.651 + // true with *ttp unchanged. 1.652 + // 1.653 + // If it is a reserved word in this version and strictness mode, and thus 1.654 + // can't be present in correct code, report a SyntaxError and return false. 1.655 + // 1.656 + // If it is a keyword, like "if", the behavior depends on ttp. If ttp is 1.657 + // null, report a SyntaxError ("if is a reserved identifier") and return 1.658 + // false. If ttp is non-null, return true with the keyword's TokenKind in 1.659 + // *ttp. 1.660 + bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp); 1.661 + 1.662 + // This class maps a userbuf offset (which is 0-indexed) to a line number 1.663 + // (which is 1-indexed) and a column index (which is 0-indexed). 1.664 + class SourceCoords 1.665 + { 1.666 + // For a given buffer holding source code, |lineStartOffsets_| has one 1.667 + // element per line of source code, plus one sentinel element. Each 1.668 + // non-sentinel element holds the buffer offset for the start of the 1.669 + // corresponding line of source code. For this example script: 1.670 + // 1.671 + // 1 // xyz [line starts at offset 0] 1.672 + // 2 var x; [line starts at offset 7] 1.673 + // 3 [line starts at offset 14] 1.674 + // 4 var y; [line starts at offset 15] 1.675 + // 1.676 + // |lineStartOffsets_| is: 1.677 + // 1.678 + // [0, 7, 14, 15, MAX_PTR] 1.679 + // 1.680 + // To convert a "line number" to a "line index" (i.e. an index into 1.681 + // |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's 1.682 + // line index is (3 - initialLineNum_), which is 2. Therefore 1.683 + // lineStartOffsets_[2] holds the buffer offset for the start of line 3, 1.684 + // which is 14. (Note that |initialLineNum_| is often 1, but not 1.685 + // always.) 1.686 + // 1.687 + // The first element is always 0, and the last element is always the 1.688 + // MAX_PTR sentinel. 1.689 + // 1.690 + // offset-to-line/column lookups are O(log n) in the worst case (binary 1.691 + // search), but in practice they're heavily clustered and we do better 1.692 + // than that by using the previous lookup's result (lastLineIndex_) as 1.693 + // a starting point. 1.694 + // 1.695 + // Checking if an offset lies within a particular line number 1.696 + // (isOnThisLine()) is O(1). 1.697 + // 1.698 + Vector<uint32_t, 128> lineStartOffsets_; 1.699 + uint32_t initialLineNum_; 1.700 + 1.701 + // This is mutable because it's modified on every search, but that fact 1.702 + // isn't visible outside this class. 1.703 + mutable uint32_t lastLineIndex_; 1.704 + 1.705 + uint32_t lineIndexOf(uint32_t offset) const; 1.706 + 1.707 + static const uint32_t MAX_PTR = UINT32_MAX; 1.708 + 1.709 + uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; } 1.710 + uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; } 1.711 + 1.712 + public: 1.713 + SourceCoords(ExclusiveContext *cx, uint32_t ln); 1.714 + 1.715 + void add(uint32_t lineNum, uint32_t lineStartOffset); 1.716 + bool fill(const SourceCoords &other); 1.717 + 1.718 + bool isOnThisLine(uint32_t offset, uint32_t lineNum) const { 1.719 + uint32_t lineIndex = lineNumToIndex(lineNum); 1.720 + JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length()); // +1 due to sentinel 1.721 + return lineStartOffsets_[lineIndex] <= offset && 1.722 + offset < lineStartOffsets_[lineIndex + 1]; 1.723 + } 1.724 + 1.725 + uint32_t lineNum(uint32_t offset) const; 1.726 + uint32_t columnIndex(uint32_t offset) const; 1.727 + void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const; 1.728 + }; 1.729 + 1.730 + SourceCoords srcCoords; 1.731 + 1.732 + JSAtomState &names() const { 1.733 + return cx->names(); 1.734 + } 1.735 + 1.736 + ExclusiveContext *context() const { 1.737 + return cx; 1.738 + } 1.739 + 1.740 + const ReadOnlyCompileOptions &options() const { 1.741 + return options_; 1.742 + } 1.743 + 1.744 + private: 1.745 + // This is the low-level interface to the JS source code buffer. It just 1.746 + // gets raw chars, basically. TokenStreams functions are layered on top 1.747 + // and do some extra stuff like converting all EOL sequences to '\n', 1.748 + // tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw 1.749 + // chars" refers to the lack of EOL sequence normalization.) 1.750 + class TokenBuf { 1.751 + public: 1.752 + TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length) 1.753 + : base_(buf), limit_(buf + length), ptr(buf) 1.754 + { } 1.755 + 1.756 + bool hasRawChars() const { 1.757 + return ptr < limit_; 1.758 + } 1.759 + 1.760 + bool atStart() const { 1.761 + return ptr == base_; 1.762 + } 1.763 + 1.764 + const jschar *base() const { 1.765 + return base_; 1.766 + } 1.767 + 1.768 + const jschar *limit() const { 1.769 + return limit_; 1.770 + } 1.771 + 1.772 + jschar getRawChar() { 1.773 + return *ptr++; // this will nullptr-crash if poisoned 1.774 + } 1.775 + 1.776 + jschar peekRawChar() const { 1.777 + return *ptr; // this will nullptr-crash if poisoned 1.778 + } 1.779 + 1.780 + bool matchRawChar(jschar c) { 1.781 + if (*ptr == c) { // this will nullptr-crash if poisoned 1.782 + ptr++; 1.783 + return true; 1.784 + } 1.785 + return false; 1.786 + } 1.787 + 1.788 + bool matchRawCharBackwards(jschar c) { 1.789 + JS_ASSERT(ptr); // make sure it hasn't been poisoned 1.790 + if (*(ptr - 1) == c) { 1.791 + ptr--; 1.792 + return true; 1.793 + } 1.794 + return false; 1.795 + } 1.796 + 1.797 + void ungetRawChar() { 1.798 + JS_ASSERT(ptr); // make sure it hasn't been poisoned 1.799 + ptr--; 1.800 + } 1.801 + 1.802 + const jschar *addressOfNextRawChar(bool allowPoisoned = false) const { 1.803 + JS_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned 1.804 + return ptr; 1.805 + } 1.806 + 1.807 + // Use this with caution! 1.808 + void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) { 1.809 + JS_ASSERT_IF(!allowPoisoned, a); 1.810 + ptr = a; 1.811 + } 1.812 + 1.813 +#ifdef DEBUG 1.814 + // Poison the TokenBuf so it cannot be accessed again. 1.815 + void poison() { 1.816 + ptr = nullptr; 1.817 + } 1.818 +#endif 1.819 + 1.820 + static bool isRawEOLChar(int32_t c) { 1.821 + return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR; 1.822 + } 1.823 + 1.824 + // Finds the next EOL, but stops once 'max' jschars have been scanned 1.825 + // (*including* the starting jschar). 1.826 + const jschar *findEOLMax(const jschar *p, size_t max); 1.827 + 1.828 + private: 1.829 + const jschar *base_; // base of buffer 1.830 + const jschar *limit_; // limit for quick bounds check 1.831 + const jschar *ptr; // next char to get 1.832 + }; 1.833 + 1.834 + TokenKind getTokenInternal(Modifier modifier); 1.835 + 1.836 + int32_t getChar(); 1.837 + int32_t getCharIgnoreEOL(); 1.838 + void ungetChar(int32_t c); 1.839 + void ungetCharIgnoreEOL(int32_t c); 1.840 + Token *newToken(ptrdiff_t adjust); 1.841 + bool peekUnicodeEscape(int32_t *c); 1.842 + bool matchUnicodeEscapeIdStart(int32_t *c); 1.843 + bool matchUnicodeEscapeIdent(int32_t *c); 1.844 + bool peekChars(int n, jschar *cp); 1.845 + 1.846 + bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); 1.847 + bool getDirective(bool isMultiline, bool shouldWarnDeprecated, 1.848 + const char *directive, int directiveLength, 1.849 + const char *errorMsgPragma, jschar **destination); 1.850 + bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated); 1.851 + bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated); 1.852 + 1.853 + // |expect| cannot be an EOL char. 1.854 + bool matchChar(int32_t expect) { 1.855 + MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect)); 1.856 + return MOZ_LIKELY(userbuf.hasRawChars()) && 1.857 + userbuf.matchRawChar(expect); 1.858 + } 1.859 + 1.860 + void consumeKnownChar(int32_t expect) { 1.861 + mozilla::DebugOnly<int32_t> c = getChar(); 1.862 + JS_ASSERT(c == expect); 1.863 + } 1.864 + 1.865 + int32_t peekChar() { 1.866 + int32_t c = getChar(); 1.867 + ungetChar(c); 1.868 + return c; 1.869 + } 1.870 + 1.871 + void skipChars(int n) { 1.872 + while (--n >= 0) 1.873 + getChar(); 1.874 + } 1.875 + 1.876 + void updateLineInfoForEOL(); 1.877 + void updateFlagsForEOL(); 1.878 + 1.879 + // Options used for parsing/tokenizing. 1.880 + const ReadOnlyCompileOptions &options_; 1.881 + 1.882 + Token tokens[ntokens]; // circular token buffer 1.883 + unsigned cursor; // index of last parsed token 1.884 + unsigned lookahead; // count of lookahead tokens 1.885 + unsigned lineno; // current line number 1.886 + Flags flags; // flags -- see above 1.887 + const jschar *linebase; // start of current line; points into userbuf 1.888 + const jschar *prevLinebase; // start of previous line; nullptr if on the first line 1.889 + TokenBuf userbuf; // user input buffer 1.890 + const char *filename; // input filename or null 1.891 + jschar *displayURL_; // the user's requested source URL or null 1.892 + jschar *sourceMapURL_; // source map's filename or null 1.893 + CharBuffer tokenbuf; // current token string buffer 1.894 + bool maybeEOL[256]; // probabilistic EOL lookup table 1.895 + bool maybeStrSpecial[256]; // speeds up string scanning 1.896 + uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs? 1.897 + ExclusiveContext *const cx; 1.898 + JSPrincipals *const originPrincipals; 1.899 + StrictModeGetter *strictModeGetter; // used to test for strict mode 1.900 +}; 1.901 + 1.902 +// Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error 1.903 +// message have const jschar* type, not const char*. 1.904 +#define JSREPORT_UC 0x100 1.905 + 1.906 +} // namespace frontend 1.907 +} // namespace js 1.908 + 1.909 +extern JS_FRIEND_API(int) 1.910 +js_fgets(char *buf, int size, FILE *file); 1.911 + 1.912 +#ifdef DEBUG 1.913 +extern const char * 1.914 +TokenKindToString(js::frontend::TokenKind tt); 1.915 +#endif 1.916 + 1.917 +#endif /* frontend_TokenStream_h */