js/src/frontend/TokenStream.h

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0 3 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 #ifndef frontend_TokenStream_h
michael@0 8 #define frontend_TokenStream_h
michael@0 9
michael@0 10 // JS lexical scanner interface.
michael@0 11
michael@0 12 #include "mozilla/DebugOnly.h"
michael@0 13 #include "mozilla/PodOperations.h"
michael@0 14
michael@0 15 #include <stdarg.h>
michael@0 16 #include <stddef.h>
michael@0 17 #include <stdio.h>
michael@0 18
michael@0 19 #include "jscntxt.h"
michael@0 20 #include "jspubtd.h"
michael@0 21
michael@0 22 #include "js/Vector.h"
michael@0 23 #include "vm/RegExpObject.h"
michael@0 24
michael@0 25 namespace js {
michael@0 26 namespace frontend {
michael@0 27
michael@0 28 // Values of this type are used to index into arrays such as isExprEnding[],
michael@0 29 // so the first value must be zero.
michael@0 30 enum TokenKind {
michael@0 31 TOK_ERROR = 0, // well-known as the only code < EOF
michael@0 32 TOK_EOF, // end of file
michael@0 33 TOK_EOL, // end of line; only returned by peekTokenSameLine()
michael@0 34 TOK_SEMI, // semicolon
michael@0 35 TOK_COMMA, // comma operator
michael@0 36 TOK_HOOK, TOK_COLON, // conditional (?:)
michael@0 37 TOK_INC, TOK_DEC, // increment/decrement (++ --)
michael@0 38 TOK_DOT, // member operator (.)
michael@0 39 TOK_TRIPLEDOT, // for rest arguments (...)
michael@0 40 TOK_LB, TOK_RB, // left and right brackets
michael@0 41 TOK_LC, TOK_RC, // left and right curlies (braces)
michael@0 42 TOK_LP, TOK_RP, // left and right parentheses
michael@0 43 TOK_NAME, // identifier
michael@0 44 TOK_NUMBER, // numeric constant
michael@0 45 TOK_STRING, // string constant
michael@0 46 TOK_REGEXP, // RegExp constant
michael@0 47 TOK_TRUE, // true
michael@0 48 TOK_FALSE, // false
michael@0 49 TOK_NULL, // null
michael@0 50 TOK_THIS, // this
michael@0 51 TOK_FUNCTION, // function keyword
michael@0 52 TOK_IF, // if keyword
michael@0 53 TOK_ELSE, // else keyword
michael@0 54 TOK_SWITCH, // switch keyword
michael@0 55 TOK_CASE, // case keyword
michael@0 56 TOK_DEFAULT, // default keyword
michael@0 57 TOK_WHILE, // while keyword
michael@0 58 TOK_DO, // do keyword
michael@0 59 TOK_FOR, // for keyword
michael@0 60 TOK_BREAK, // break keyword
michael@0 61 TOK_CONTINUE, // continue keyword
michael@0 62 TOK_VAR, // var keyword
michael@0 63 TOK_CONST, // const keyword
michael@0 64 TOK_WITH, // with keyword
michael@0 65 TOK_RETURN, // return keyword
michael@0 66 TOK_NEW, // new keyword
michael@0 67 TOK_DELETE, // delete keyword
michael@0 68 TOK_TRY, // try keyword
michael@0 69 TOK_CATCH, // catch keyword
michael@0 70 TOK_FINALLY, // finally keyword
michael@0 71 TOK_THROW, // throw keyword
michael@0 72 TOK_DEBUGGER, // debugger keyword
michael@0 73 TOK_YIELD, // yield from generator function
michael@0 74 TOK_LET, // let keyword
michael@0 75 TOK_EXPORT, // export keyword
michael@0 76 TOK_IMPORT, // import keyword
michael@0 77 TOK_RESERVED, // reserved keywords
michael@0 78 TOK_STRICT_RESERVED, // reserved keywords in strict mode
michael@0 79
michael@0 80 // The following token types occupy contiguous ranges to enable easy
michael@0 81 // range-testing.
michael@0 82
michael@0 83 // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same
michael@0 84 // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h.
michael@0 85 TOK_OR, // logical or (||)
michael@0 86 TOK_BINOP_FIRST = TOK_OR,
michael@0 87 TOK_AND, // logical and (&&)
michael@0 88 TOK_BITOR, // bitwise-or (|)
michael@0 89 TOK_BITXOR, // bitwise-xor (^)
michael@0 90 TOK_BITAND, // bitwise-and (&)
michael@0 91
michael@0 92 // Equality operation tokens, per TokenKindIsEquality.
michael@0 93 TOK_STRICTEQ,
michael@0 94 TOK_EQUALITY_START = TOK_STRICTEQ,
michael@0 95 TOK_EQ,
michael@0 96 TOK_STRICTNE,
michael@0 97 TOK_NE,
michael@0 98 TOK_EQUALITY_LAST = TOK_NE,
michael@0 99
michael@0 100 // Relational ops (< <= > >=), per TokenKindIsRelational.
michael@0 101 TOK_LT,
michael@0 102 TOK_RELOP_START = TOK_LT,
michael@0 103 TOK_LE,
michael@0 104 TOK_GT,
michael@0 105 TOK_GE,
michael@0 106 TOK_RELOP_LAST = TOK_GE,
michael@0 107
michael@0 108 TOK_INSTANCEOF, // |instanceof| keyword
michael@0 109 TOK_IN, // |in| keyword
michael@0 110
michael@0 111 // Shift ops (<< >> >>>), per TokenKindIsShift.
michael@0 112 TOK_LSH,
michael@0 113 TOK_SHIFTOP_START = TOK_LSH,
michael@0 114 TOK_RSH,
michael@0 115 TOK_URSH,
michael@0 116 TOK_SHIFTOP_LAST = TOK_URSH,
michael@0 117
michael@0 118 TOK_ADD,
michael@0 119 TOK_SUB,
michael@0 120 TOK_MUL,
michael@0 121 TOK_DIV,
michael@0 122 TOK_MOD,
michael@0 123 TOK_BINOP_LAST = TOK_MOD,
michael@0 124
michael@0 125 // Unary operation tokens.
michael@0 126 TOK_TYPEOF,
michael@0 127 TOK_VOID,
michael@0 128 TOK_NOT,
michael@0 129 TOK_BITNOT,
michael@0 130
michael@0 131 TOK_ARROW, // function arrow (=>)
michael@0 132
michael@0 133 // Assignment ops (= += -= etc.), per TokenKindIsAssignment
michael@0 134 TOK_ASSIGN,
michael@0 135 TOK_ASSIGNMENT_START = TOK_ASSIGN,
michael@0 136 TOK_ADDASSIGN,
michael@0 137 TOK_SUBASSIGN,
michael@0 138 TOK_BITORASSIGN,
michael@0 139 TOK_BITXORASSIGN,
michael@0 140 TOK_BITANDASSIGN,
michael@0 141 TOK_LSHASSIGN,
michael@0 142 TOK_RSHASSIGN,
michael@0 143 TOK_URSHASSIGN,
michael@0 144 TOK_MULASSIGN,
michael@0 145 TOK_DIVASSIGN,
michael@0 146 TOK_MODASSIGN,
michael@0 147 TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,
michael@0 148
michael@0 149 TOK_LIMIT // domain size
michael@0 150 };
michael@0 151
michael@0 152 inline bool
michael@0 153 TokenKindIsBinaryOp(TokenKind tt)
michael@0 154 {
michael@0 155 return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST;
michael@0 156 }
michael@0 157
michael@0 158 inline bool
michael@0 159 TokenKindIsEquality(TokenKind tt)
michael@0 160 {
michael@0 161 return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;
michael@0 162 }
michael@0 163
michael@0 164 inline bool
michael@0 165 TokenKindIsRelational(TokenKind tt)
michael@0 166 {
michael@0 167 return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;
michael@0 168 }
michael@0 169
michael@0 170 inline bool
michael@0 171 TokenKindIsShift(TokenKind tt)
michael@0 172 {
michael@0 173 return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;
michael@0 174 }
michael@0 175
michael@0 176 inline bool
michael@0 177 TokenKindIsAssignment(TokenKind tt)
michael@0 178 {
michael@0 179 return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;
michael@0 180 }
michael@0 181
michael@0 182 inline bool
michael@0 183 TokenKindIsDecl(TokenKind tt)
michael@0 184 {
michael@0 185 return tt == TOK_VAR || tt == TOK_LET;
michael@0 186 }
michael@0 187
michael@0 188 struct TokenPos {
michael@0 189 uint32_t begin; // Offset of the token's first char.
michael@0 190 uint32_t end; // Offset of 1 past the token's last char.
michael@0 191
michael@0 192 TokenPos() {}
michael@0 193 TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
michael@0 194
michael@0 195 // Return a TokenPos that covers left, right, and anything in between.
michael@0 196 static TokenPos box(const TokenPos &left, const TokenPos &right) {
michael@0 197 JS_ASSERT(left.begin <= left.end);
michael@0 198 JS_ASSERT(left.end <= right.begin);
michael@0 199 JS_ASSERT(right.begin <= right.end);
michael@0 200 return TokenPos(left.begin, right.end);
michael@0 201 }
michael@0 202
michael@0 203 bool operator==(const TokenPos& bpos) const {
michael@0 204 return begin == bpos.begin && end == bpos.end;
michael@0 205 }
michael@0 206
michael@0 207 bool operator!=(const TokenPos& bpos) const {
michael@0 208 return begin != bpos.begin || end != bpos.end;
michael@0 209 }
michael@0 210
michael@0 211 bool operator <(const TokenPos& bpos) const {
michael@0 212 return begin < bpos.begin;
michael@0 213 }
michael@0 214
michael@0 215 bool operator <=(const TokenPos& bpos) const {
michael@0 216 return begin <= bpos.begin;
michael@0 217 }
michael@0 218
michael@0 219 bool operator >(const TokenPos& bpos) const {
michael@0 220 return !(*this <= bpos);
michael@0 221 }
michael@0 222
michael@0 223 bool operator >=(const TokenPos& bpos) const {
michael@0 224 return !(*this < bpos);
michael@0 225 }
michael@0 226
michael@0 227 bool encloses(const TokenPos& pos) const {
michael@0 228 return begin <= pos.begin && pos.end <= end;
michael@0 229 }
michael@0 230 };
michael@0 231
michael@0 232 enum DecimalPoint { NoDecimal = false, HasDecimal = true };
michael@0 233
michael@0 234 struct Token
michael@0 235 {
michael@0 236 TokenKind type; // char value or above enumerator
michael@0 237 TokenPos pos; // token position in file
michael@0 238 union {
michael@0 239 private:
michael@0 240 friend struct Token;
michael@0 241 PropertyName *name; // non-numeric atom
michael@0 242 JSAtom *atom; // potentially-numeric atom
michael@0 243 struct {
michael@0 244 double value; // floating point number
michael@0 245 DecimalPoint decimalPoint; // literal contains '.'
michael@0 246 } number;
michael@0 247 RegExpFlag reflags; // regexp flags; use tokenbuf to access
michael@0 248 // regexp chars
michael@0 249 } u;
michael@0 250
michael@0 251 // This constructor is necessary only for MSVC 2013 and how it compiles the
michael@0 252 // initialization of TokenStream::tokens. That field is initialized as
michael@0 253 // tokens() in the constructor init-list. This *should* zero the entire
michael@0 254 // array, then (because Token has a non-trivial constructor, because
michael@0 255 // TokenPos has a user-provided constructor) call the implicit Token
michael@0 256 // constructor on each element, which would call the TokenPos constructor
michael@0 257 // for Token::pos and do nothing. (All of which is equivalent to just
michael@0 258 // zeroing TokenStream::tokens.) But MSVC 2013 (2010/2012 don't have this
michael@0 259 // bug) doesn't zero out each element, so we need this extra constructor to
michael@0 260 // make it do the right thing. (Token is used primarily by reference or
michael@0 261 // pointer, and it's only initialized a very few places, so having a
michael@0 262 // user-defined constructor won't hurt perf.) See also bug 920318.
michael@0 263 Token()
michael@0 264 : type(TOK_ERROR),
michael@0 265 pos(0, 0)
michael@0 266 {
michael@0 267 }
michael@0 268
michael@0 269 // Mutators
michael@0 270
michael@0 271 void setName(PropertyName *name) {
michael@0 272 JS_ASSERT(type == TOK_NAME);
michael@0 273 JS_ASSERT(!IsPoisonedPtr(name));
michael@0 274 u.name = name;
michael@0 275 }
michael@0 276
michael@0 277 void setAtom(JSAtom *atom) {
michael@0 278 JS_ASSERT(type == TOK_STRING);
michael@0 279 JS_ASSERT(!IsPoisonedPtr(atom));
michael@0 280 u.atom = atom;
michael@0 281 }
michael@0 282
michael@0 283 void setRegExpFlags(js::RegExpFlag flags) {
michael@0 284 JS_ASSERT(type == TOK_REGEXP);
michael@0 285 JS_ASSERT((flags & AllFlags) == flags);
michael@0 286 u.reflags = flags;
michael@0 287 }
michael@0 288
michael@0 289 void setNumber(double n, DecimalPoint decimalPoint) {
michael@0 290 JS_ASSERT(type == TOK_NUMBER);
michael@0 291 u.number.value = n;
michael@0 292 u.number.decimalPoint = decimalPoint;
michael@0 293 }
michael@0 294
michael@0 295 // Type-safe accessors
michael@0 296
michael@0 297 PropertyName *name() const {
michael@0 298 JS_ASSERT(type == TOK_NAME);
michael@0 299 return u.name->asPropertyName(); // poor-man's type verification
michael@0 300 }
michael@0 301
michael@0 302 JSAtom *atom() const {
michael@0 303 JS_ASSERT(type == TOK_STRING);
michael@0 304 return u.atom;
michael@0 305 }
michael@0 306
michael@0 307 js::RegExpFlag regExpFlags() const {
michael@0 308 JS_ASSERT(type == TOK_REGEXP);
michael@0 309 JS_ASSERT((u.reflags & AllFlags) == u.reflags);
michael@0 310 return u.reflags;
michael@0 311 }
michael@0 312
michael@0 313 double number() const {
michael@0 314 JS_ASSERT(type == TOK_NUMBER);
michael@0 315 return u.number.value;
michael@0 316 }
michael@0 317
michael@0 318 DecimalPoint decimalPoint() const {
michael@0 319 JS_ASSERT(type == TOK_NUMBER);
michael@0 320 return u.number.decimalPoint;
michael@0 321 }
michael@0 322 };
michael@0 323
michael@0 324 struct CompileError {
michael@0 325 JSErrorReport report;
michael@0 326 char *message;
michael@0 327 ErrorArgumentsType argumentsType;
michael@0 328 CompileError()
michael@0 329 : message(nullptr), argumentsType(ArgumentsAreUnicode)
michael@0 330 {
michael@0 331 mozilla::PodZero(&report);
michael@0 332 }
michael@0 333 ~CompileError();
michael@0 334 void throwError(JSContext *cx);
michael@0 335
michael@0 336 private:
michael@0 337 // CompileError owns raw allocated memory, so disable assignment and copying
michael@0 338 // for safety.
michael@0 339 void operator=(const CompileError &) MOZ_DELETE;
michael@0 340 CompileError(const CompileError &) MOZ_DELETE;
michael@0 341 };
michael@0 342
michael@0 343 // Ideally, tokenizing would be entirely independent of context. But the
michael@0 344 // strict mode flag, which is in SharedContext, affects tokenizing, and
michael@0 345 // TokenStream needs to see it.
michael@0 346 //
michael@0 347 // This class is a tiny back-channel from TokenStream to the strict mode flag
michael@0 348 // that avoids exposing the rest of SharedContext to TokenStream.
michael@0 349 //
michael@0 350 class StrictModeGetter {
michael@0 351 public:
michael@0 352 virtual bool strictMode() = 0;
michael@0 353 };
michael@0 354
michael@0 355 // TokenStream is the lexical scanner for Javascript source text.
michael@0 356 //
michael@0 357 // It takes a buffer of jschars and linearly scans it into |Token|s.
michael@0 358 // Internally the class uses a four element circular buffer |tokens| of
michael@0 359 // |Token|s. As an index for |tokens|, the member |cursor| points to the
michael@0 360 // current token.
michael@0 361 // Calls to getToken() increase |cursor| by one and return the new current
michael@0 362 // token. If a TokenStream was just created, the current token is initialized
michael@0 363 // with random data (i.e. not initialized). It is therefore important that
michael@0 364 // one of the first four member functions listed below is called first.
michael@0 365 // The circular buffer lets us go back up to two tokens from the last
michael@0 366 // scanned token. Internally, the relative number of backward steps that were
michael@0 367 // taken (via ungetToken()) after the last token was scanned is stored in
michael@0 368 // |lookahead|.
michael@0 369 //
michael@0 370 // The following table lists in which situations it is safe to call each listed
michael@0 371 // function. No checks are made by the functions in non-debug builds.
michael@0 372 //
michael@0 373 // Function Name | Precondition; changes to |lookahead|
michael@0 374 // ------------------+---------------------------------------------------------
michael@0 375 // getToken | none; if |lookahead > 0| then |lookahead--|
michael@0 376 // peekToken | none; if |lookahead == 0| then |lookahead == 1|
michael@0 377 // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
michael@0 378 // matchToken | none; if |lookahead > 0| and the match succeeds then
michael@0 379 // | |lookahead--|
michael@0 380 // consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
michael@0 381 // ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
michael@0 382 //
michael@0 383 // The behavior of the token scanning process (see getTokenInternal()) can be
michael@0 384 // modified by calling one of the first four above listed member functions with
michael@0 385 // an optional argument of type Modifier. However, the modifier will be
michael@0 386 // ignored unless |lookahead == 0| holds. Due to constraints of the grammar,
michael@0 387 // this turns out not to be a problem in practice. See the
michael@0 388 // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
michael@0 389 // for more details:
michael@0 390 // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
michael@0 391 //
michael@0 392 // The methods seek() and tell() allow to rescan from a previous visited
michael@0 393 // location of the buffer.
michael@0 394 //
michael@0 395 class MOZ_STACK_CLASS TokenStream
michael@0 396 {
michael@0 397 // Unicode separators that are treated as line terminators, in addition to \n, \r.
michael@0 398 enum {
michael@0 399 LINE_SEPARATOR = 0x2028,
michael@0 400 PARA_SEPARATOR = 0x2029
michael@0 401 };
michael@0 402
michael@0 403 static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded
michael@0 404 // to power of 2 to avoid divmod by 3
michael@0 405 static const unsigned maxLookahead = 2;
michael@0 406 static const unsigned ntokensMask = ntokens - 1;
michael@0 407
michael@0 408 public:
michael@0 409 typedef Vector<jschar, 32> CharBuffer;
michael@0 410
michael@0 411 TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
michael@0 412 const jschar *base, size_t length, StrictModeGetter *smg);
michael@0 413
michael@0 414 ~TokenStream();
michael@0 415
michael@0 416 // Accessors.
michael@0 417 const Token &currentToken() const { return tokens[cursor]; }
michael@0 418 bool isCurrentTokenType(TokenKind type) const {
michael@0 419 return currentToken().type == type;
michael@0 420 }
michael@0 421 const CharBuffer &getTokenbuf() const { return tokenbuf; }
michael@0 422 const char *getFilename() const { return filename; }
michael@0 423 unsigned getLineno() const { return lineno; }
michael@0 424 unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; }
michael@0 425 JSPrincipals *getOriginPrincipals() const { return originPrincipals; }
michael@0 426 JSVersion versionNumber() const { return VersionNumber(options().version); }
michael@0 427 JSVersion versionWithFlags() const { return options().version; }
michael@0 428
michael@0 429 PropertyName *currentName() const {
michael@0 430 if (isCurrentTokenType(TOK_YIELD))
michael@0 431 return cx->names().yield;
michael@0 432 JS_ASSERT(isCurrentTokenType(TOK_NAME));
michael@0 433 return currentToken().name();
michael@0 434 }
michael@0 435
michael@0 436 bool isCurrentTokenAssignment() const {
michael@0 437 return TokenKindIsAssignment(currentToken().type);
michael@0 438 }
michael@0 439
michael@0 440 // Flag methods.
michael@0 441 bool isEOF() const { return flags.isEOF; }
michael@0 442 bool sawOctalEscape() const { return flags.sawOctalEscape; }
michael@0 443 bool hadError() const { return flags.hadError; }
michael@0 444
michael@0 445 // TokenStream-specific error reporters.
michael@0 446 bool reportError(unsigned errorNumber, ...);
michael@0 447 bool reportWarning(unsigned errorNumber, ...);
michael@0 448
michael@0 449 static const uint32_t NoOffset = UINT32_MAX;
michael@0 450
michael@0 451 // General-purpose error reporters. You should avoid calling these
michael@0 452 // directly, and instead use the more succinct alternatives (e.g.
michael@0 453 // reportError()) in TokenStream, Parser, and BytecodeEmitter.
michael@0 454 bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
michael@0 455 va_list args);
michael@0 456 bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
michael@0 457 va_list args);
michael@0 458 bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,
michael@0 459 va_list args);
michael@0 460
michael@0 461 // asm.js reporter
michael@0 462 void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
michael@0 463
michael@0 464 private:
michael@0 465 // These are private because they should only be called by the tokenizer
michael@0 466 // while tokenizing not by, for example, BytecodeEmitter.
michael@0 467 bool reportStrictModeError(unsigned errorNumber, ...);
michael@0 468 bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
michael@0 469
michael@0 470 void onError();
michael@0 471 static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb);
michael@0 472 bool putIdentInTokenbuf(const jschar *identStart);
michael@0 473
michael@0 474 struct Flags
michael@0 475 {
michael@0 476 bool isEOF:1; // Hit end of file.
michael@0 477 bool isDirtyLine:1; // Non-whitespace since start of line.
michael@0 478 bool sawOctalEscape:1; // Saw an octal character escape.
michael@0 479 bool hadError:1; // Returned TOK_ERROR from getToken.
michael@0 480
michael@0 481 Flags()
michael@0 482 : isEOF(), isDirtyLine(), sawOctalEscape(), hadError()
michael@0 483 {}
michael@0 484 };
michael@0 485
michael@0 486 public:
michael@0 487 // Sometimes the parser needs to modify how tokens are created.
michael@0 488 enum Modifier
michael@0 489 {
michael@0 490 None, // Normal operation.
michael@0 491 Operand, // Looking for an operand, not an operator. In
michael@0 492 // practice, this means that when '/' is seen,
michael@0 493 // we look for a regexp instead of just returning
michael@0 494 // TOK_DIV.
michael@0 495 KeywordIsName, // Treat keywords as names by returning TOK_NAME.
michael@0 496 };
michael@0 497
michael@0 498 // Get the next token from the stream, make it the current token, and
michael@0 499 // return its kind.
michael@0 500 TokenKind getToken(Modifier modifier = None) {
michael@0 501 // Check for a pushed-back token resulting from mismatching lookahead.
michael@0 502 if (lookahead != 0) {
michael@0 503 lookahead--;
michael@0 504 cursor = (cursor + 1) & ntokensMask;
michael@0 505 TokenKind tt = currentToken().type;
michael@0 506 JS_ASSERT(tt != TOK_EOL);
michael@0 507 return tt;
michael@0 508 }
michael@0 509
michael@0 510 return getTokenInternal(modifier);
michael@0 511 }
michael@0 512
michael@0 513 // Push the last scanned token back into the stream.
michael@0 514 void ungetToken() {
michael@0 515 JS_ASSERT(lookahead < maxLookahead);
michael@0 516 lookahead++;
michael@0 517 cursor = (cursor - 1) & ntokensMask;
michael@0 518 }
michael@0 519
michael@0 520 TokenKind peekToken(Modifier modifier = None) {
michael@0 521 if (lookahead != 0)
michael@0 522 return tokens[(cursor + 1) & ntokensMask].type;
michael@0 523 TokenKind tt = getTokenInternal(modifier);
michael@0 524 ungetToken();
michael@0 525 return tt;
michael@0 526 }
michael@0 527
michael@0 528 TokenPos peekTokenPos(Modifier modifier = None) {
michael@0 529 if (lookahead != 0)
michael@0 530 return tokens[(cursor + 1) & ntokensMask].pos;
michael@0 531 getTokenInternal(modifier);
michael@0 532 ungetToken();
michael@0 533 JS_ASSERT(lookahead != 0);
michael@0 534 return tokens[(cursor + 1) & ntokensMask].pos;
michael@0 535 }
michael@0 536
michael@0 537 // This is like peekToken(), with one exception: if there is an EOL
michael@0 538 // between the end of the current token and the start of the next token, it
michael@0 539 // returns TOK_EOL. In that case, no token with TOK_EOL is actually
michael@0 540 // created, just a TOK_EOL TokenKind is returned, and currentToken()
michael@0 541 // shouldn't be consulted. (This is the only place TOK_EOL is produced.)
michael@0 542 MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) {
michael@0 543 const Token &curr = currentToken();
michael@0 544
michael@0 545 // If lookahead != 0, we have scanned ahead at least one token, and
michael@0 546 // |lineno| is the line that the furthest-scanned token ends on. If
michael@0 547 // it's the same as the line that the current token ends on, that's a
michael@0 548 // stronger condition than what we are looking for, and we don't need
michael@0 549 // to return TOK_EOL.
michael@0 550 if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno))
michael@0 551 return tokens[(cursor + 1) & ntokensMask].type;
michael@0 552
michael@0 553 // The above check misses two cases where we don't have to return
michael@0 554 // TOK_EOL.
michael@0 555 // - The next token starts on the same line, but is a multi-line token.
michael@0 556 // - The next token starts on the same line, but lookahead==2 and there
michael@0 557 // is a newline between the next token and the one after that.
michael@0 558 // The following test is somewhat expensive but gets these cases (and
michael@0 559 // all others) right.
michael@0 560 (void)getToken(modifier);
michael@0 561 const Token &next = currentToken();
michael@0 562 ungetToken();
michael@0 563 return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin)
michael@0 564 ? next.type
michael@0 565 : TOK_EOL;
michael@0 566 }
michael@0 567
michael@0 568 // Get the next token from the stream if its kind is |tt|.
michael@0 569 bool matchToken(TokenKind tt, Modifier modifier = None) {
michael@0 570 if (getToken(modifier) == tt)
michael@0 571 return true;
michael@0 572 ungetToken();
michael@0 573 return false;
michael@0 574 }
michael@0 575
michael@0 576 void consumeKnownToken(TokenKind tt) {
michael@0 577 JS_ALWAYS_TRUE(matchToken(tt));
michael@0 578 }
michael@0 579
michael@0 580 bool matchContextualKeyword(Handle<PropertyName*> keyword) {
michael@0 581 if (getToken() == TOK_NAME && currentToken().name() == keyword)
michael@0 582 return true;
michael@0 583 ungetToken();
michael@0 584 return false;
michael@0 585 }
michael@0 586
michael@0 587 bool nextTokenEndsExpr() {
michael@0 588 return isExprEnding[peekToken()];
michael@0 589 }
michael@0 590
michael@0 591 class MOZ_STACK_CLASS Position {
michael@0 592 public:
michael@0 593 // The Token fields may contain pointers to atoms, so for correct
michael@0 594 // rooting we must ensure collection of atoms is disabled while objects
michael@0 595 // of this class are live. Do this by requiring a dummy AutoKeepAtoms
michael@0 596 // reference in the constructor.
michael@0 597 //
michael@0 598 // This class is explicity ignored by the analysis, so don't add any
michael@0 599 // more pointers to GC things here!
michael@0 600 Position(AutoKeepAtoms&) { }
michael@0 601 private:
michael@0 602 Position(const Position&) MOZ_DELETE;
michael@0 603 friend class TokenStream;
michael@0 604 const jschar *buf;
michael@0 605 Flags flags;
michael@0 606 unsigned lineno;
michael@0 607 const jschar *linebase;
michael@0 608 const jschar *prevLinebase;
michael@0 609 Token currentToken;
michael@0 610 unsigned lookahead;
michael@0 611 Token lookaheadTokens[maxLookahead];
michael@0 612 };
michael@0 613
michael@0 614 void advance(size_t position);
michael@0 615 void tell(Position *);
michael@0 616 void seek(const Position &pos);
michael@0 617 bool seek(const Position &pos, const TokenStream &other);
michael@0 618
michael@0 619 size_t positionToOffset(const Position &pos) const {
michael@0 620 return pos.buf - userbuf.base();
michael@0 621 }
michael@0 622
michael@0 623 const jschar *rawBase() const {
michael@0 624 return userbuf.base();
michael@0 625 }
michael@0 626
michael@0 627 const jschar *rawLimit() const {
michael@0 628 return userbuf.limit();
michael@0 629 }
michael@0 630
michael@0 631 bool hasDisplayURL() const {
michael@0 632 return displayURL_ != nullptr;
michael@0 633 }
michael@0 634
michael@0 635 jschar *displayURL() {
michael@0 636 return displayURL_;
michael@0 637 }
michael@0 638
michael@0 639 bool hasSourceMapURL() const {
michael@0 640 return sourceMapURL_ != nullptr;
michael@0 641 }
michael@0 642
michael@0 643 jschar *sourceMapURL() {
michael@0 644 return sourceMapURL_;
michael@0 645 }
michael@0 646
michael@0 647 // If the name at s[0:length] is not a keyword in this version, return
michael@0 648 // true with *ttp unchanged.
michael@0 649 //
michael@0 650 // If it is a reserved word in this version and strictness mode, and thus
michael@0 651 // can't be present in correct code, report a SyntaxError and return false.
michael@0 652 //
michael@0 653 // If it is a keyword, like "if", the behavior depends on ttp. If ttp is
michael@0 654 // null, report a SyntaxError ("if is a reserved identifier") and return
michael@0 655 // false. If ttp is non-null, return true with the keyword's TokenKind in
michael@0 656 // *ttp.
michael@0 657 bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp);
michael@0 658
michael@0 659 // This class maps a userbuf offset (which is 0-indexed) to a line number
michael@0 660 // (which is 1-indexed) and a column index (which is 0-indexed).
michael@0 661 class SourceCoords
michael@0 662 {
michael@0 663 // For a given buffer holding source code, |lineStartOffsets_| has one
michael@0 664 // element per line of source code, plus one sentinel element. Each
michael@0 665 // non-sentinel element holds the buffer offset for the start of the
michael@0 666 // corresponding line of source code. For this example script:
michael@0 667 //
michael@0 668 // 1 // xyz [line starts at offset 0]
michael@0 669 // 2 var x; [line starts at offset 7]
michael@0 670 // 3 [line starts at offset 14]
michael@0 671 // 4 var y; [line starts at offset 15]
michael@0 672 //
michael@0 673 // |lineStartOffsets_| is:
michael@0 674 //
michael@0 675 // [0, 7, 14, 15, MAX_PTR]
michael@0 676 //
michael@0 677 // To convert a "line number" to a "line index" (i.e. an index into
michael@0 678 // |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's
michael@0 679 // line index is (3 - initialLineNum_), which is 2. Therefore
michael@0 680 // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
michael@0 681 // which is 14. (Note that |initialLineNum_| is often 1, but not
michael@0 682 // always.)
michael@0 683 //
michael@0 684 // The first element is always 0, and the last element is always the
michael@0 685 // MAX_PTR sentinel.
michael@0 686 //
michael@0 687 // offset-to-line/column lookups are O(log n) in the worst case (binary
michael@0 688 // search), but in practice they're heavily clustered and we do better
michael@0 689 // than that by using the previous lookup's result (lastLineIndex_) as
michael@0 690 // a starting point.
michael@0 691 //
michael@0 692 // Checking if an offset lies within a particular line number
michael@0 693 // (isOnThisLine()) is O(1).
michael@0 694 //
michael@0 695 Vector<uint32_t, 128> lineStartOffsets_;
michael@0 696 uint32_t initialLineNum_;
michael@0 697
michael@0 698 // This is mutable because it's modified on every search, but that fact
michael@0 699 // isn't visible outside this class.
michael@0 700 mutable uint32_t lastLineIndex_;
michael@0 701
michael@0 702 uint32_t lineIndexOf(uint32_t offset) const;
michael@0 703
michael@0 704 static const uint32_t MAX_PTR = UINT32_MAX;
michael@0 705
michael@0 706 uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
michael@0 707 uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; }
michael@0 708
michael@0 709 public:
michael@0 710 SourceCoords(ExclusiveContext *cx, uint32_t ln);
michael@0 711
michael@0 712 void add(uint32_t lineNum, uint32_t lineStartOffset);
michael@0 713 bool fill(const SourceCoords &other);
michael@0 714
michael@0 715 bool isOnThisLine(uint32_t offset, uint32_t lineNum) const {
michael@0 716 uint32_t lineIndex = lineNumToIndex(lineNum);
michael@0 717 JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length()); // +1 due to sentinel
michael@0 718 return lineStartOffsets_[lineIndex] <= offset &&
michael@0 719 offset < lineStartOffsets_[lineIndex + 1];
michael@0 720 }
michael@0 721
michael@0 722 uint32_t lineNum(uint32_t offset) const;
michael@0 723 uint32_t columnIndex(uint32_t offset) const;
michael@0 724 void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const;
michael@0 725 };
michael@0 726
michael@0 727 SourceCoords srcCoords;
michael@0 728
michael@0 729 JSAtomState &names() const {
michael@0 730 return cx->names();
michael@0 731 }
michael@0 732
michael@0 733 ExclusiveContext *context() const {
michael@0 734 return cx;
michael@0 735 }
michael@0 736
michael@0 737 const ReadOnlyCompileOptions &options() const {
michael@0 738 return options_;
michael@0 739 }
michael@0 740
michael@0 741 private:
michael@0 742 // This is the low-level interface to the JS source code buffer. It just
michael@0 743 // gets raw chars, basically. TokenStreams functions are layered on top
michael@0 744 // and do some extra stuff like converting all EOL sequences to '\n',
michael@0 745 // tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw
michael@0 746 // chars" refers to the lack of EOL sequence normalization.)
michael@0 747 class TokenBuf {
michael@0 748 public:
michael@0 749 TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length)
michael@0 750 : base_(buf), limit_(buf + length), ptr(buf)
michael@0 751 { }
michael@0 752
michael@0 753 bool hasRawChars() const {
michael@0 754 return ptr < limit_;
michael@0 755 }
michael@0 756
michael@0 757 bool atStart() const {
michael@0 758 return ptr == base_;
michael@0 759 }
michael@0 760
michael@0 761 const jschar *base() const {
michael@0 762 return base_;
michael@0 763 }
michael@0 764
michael@0 765 const jschar *limit() const {
michael@0 766 return limit_;
michael@0 767 }
michael@0 768
michael@0 769 jschar getRawChar() {
michael@0 770 return *ptr++; // this will nullptr-crash if poisoned
michael@0 771 }
michael@0 772
michael@0 773 jschar peekRawChar() const {
michael@0 774 return *ptr; // this will nullptr-crash if poisoned
michael@0 775 }
michael@0 776
michael@0 777 bool matchRawChar(jschar c) {
michael@0 778 if (*ptr == c) { // this will nullptr-crash if poisoned
michael@0 779 ptr++;
michael@0 780 return true;
michael@0 781 }
michael@0 782 return false;
michael@0 783 }
michael@0 784
michael@0 785 bool matchRawCharBackwards(jschar c) {
michael@0 786 JS_ASSERT(ptr); // make sure it hasn't been poisoned
michael@0 787 if (*(ptr - 1) == c) {
michael@0 788 ptr--;
michael@0 789 return true;
michael@0 790 }
michael@0 791 return false;
michael@0 792 }
michael@0 793
michael@0 794 void ungetRawChar() {
michael@0 795 JS_ASSERT(ptr); // make sure it hasn't been poisoned
michael@0 796 ptr--;
michael@0 797 }
michael@0 798
michael@0 799 const jschar *addressOfNextRawChar(bool allowPoisoned = false) const {
michael@0 800 JS_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned
michael@0 801 return ptr;
michael@0 802 }
michael@0 803
michael@0 804 // Use this with caution!
michael@0 805 void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) {
michael@0 806 JS_ASSERT_IF(!allowPoisoned, a);
michael@0 807 ptr = a;
michael@0 808 }
michael@0 809
michael@0 810 #ifdef DEBUG
michael@0 811 // Poison the TokenBuf so it cannot be accessed again.
michael@0 812 void poison() {
michael@0 813 ptr = nullptr;
michael@0 814 }
michael@0 815 #endif
michael@0 816
michael@0 817 static bool isRawEOLChar(int32_t c) {
michael@0 818 return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR;
michael@0 819 }
michael@0 820
michael@0 821 // Finds the next EOL, but stops once 'max' jschars have been scanned
michael@0 822 // (*including* the starting jschar).
michael@0 823 const jschar *findEOLMax(const jschar *p, size_t max);
michael@0 824
michael@0 825 private:
michael@0 826 const jschar *base_; // base of buffer
michael@0 827 const jschar *limit_; // limit for quick bounds check
michael@0 828 const jschar *ptr; // next char to get
michael@0 829 };
michael@0 830
michael@0 831 TokenKind getTokenInternal(Modifier modifier);
michael@0 832
michael@0 833 int32_t getChar();
michael@0 834 int32_t getCharIgnoreEOL();
michael@0 835 void ungetChar(int32_t c);
michael@0 836 void ungetCharIgnoreEOL(int32_t c);
michael@0 837 Token *newToken(ptrdiff_t adjust);
michael@0 838 bool peekUnicodeEscape(int32_t *c);
michael@0 839 bool matchUnicodeEscapeIdStart(int32_t *c);
michael@0 840 bool matchUnicodeEscapeIdent(int32_t *c);
michael@0 841 bool peekChars(int n, jschar *cp);
michael@0 842
michael@0 843 bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
michael@0 844 bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
michael@0 845 const char *directive, int directiveLength,
michael@0 846 const char *errorMsgPragma, jschar **destination);
michael@0 847 bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
michael@0 848 bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);
michael@0 849
michael@0 850 // |expect| cannot be an EOL char.
michael@0 851 bool matchChar(int32_t expect) {
michael@0 852 MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
michael@0 853 return MOZ_LIKELY(userbuf.hasRawChars()) &&
michael@0 854 userbuf.matchRawChar(expect);
michael@0 855 }
michael@0 856
michael@0 857 void consumeKnownChar(int32_t expect) {
michael@0 858 mozilla::DebugOnly<int32_t> c = getChar();
michael@0 859 JS_ASSERT(c == expect);
michael@0 860 }
michael@0 861
michael@0 862 int32_t peekChar() {
michael@0 863 int32_t c = getChar();
michael@0 864 ungetChar(c);
michael@0 865 return c;
michael@0 866 }
michael@0 867
michael@0 868 void skipChars(int n) {
michael@0 869 while (--n >= 0)
michael@0 870 getChar();
michael@0 871 }
michael@0 872
michael@0 873 void updateLineInfoForEOL();
michael@0 874 void updateFlagsForEOL();
michael@0 875
michael@0 876 // Options used for parsing/tokenizing.
michael@0 877 const ReadOnlyCompileOptions &options_;
michael@0 878
michael@0 879 Token tokens[ntokens]; // circular token buffer
michael@0 880 unsigned cursor; // index of last parsed token
michael@0 881 unsigned lookahead; // count of lookahead tokens
michael@0 882 unsigned lineno; // current line number
michael@0 883 Flags flags; // flags -- see above
michael@0 884 const jschar *linebase; // start of current line; points into userbuf
michael@0 885 const jschar *prevLinebase; // start of previous line; nullptr if on the first line
michael@0 886 TokenBuf userbuf; // user input buffer
michael@0 887 const char *filename; // input filename or null
michael@0 888 jschar *displayURL_; // the user's requested source URL or null
michael@0 889 jschar *sourceMapURL_; // source map's filename or null
michael@0 890 CharBuffer tokenbuf; // current token string buffer
michael@0 891 bool maybeEOL[256]; // probabilistic EOL lookup table
michael@0 892 bool maybeStrSpecial[256]; // speeds up string scanning
michael@0 893 uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
michael@0 894 ExclusiveContext *const cx;
michael@0 895 JSPrincipals *const originPrincipals;
michael@0 896 StrictModeGetter *strictModeGetter; // used to test for strict mode
michael@0 897 };
michael@0 898
michael@0 899 // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
michael@0 900 // message have const jschar* type, not const char*.
michael@0 901 #define JSREPORT_UC 0x100
michael@0 902
michael@0 903 } // namespace frontend
michael@0 904 } // namespace js
michael@0 905
michael@0 906 extern JS_FRIEND_API(int)
michael@0 907 js_fgets(char *buf, int size, FILE *file);
michael@0 908
michael@0 909 #ifdef DEBUG
michael@0 910 extern const char *
michael@0 911 TokenKindToString(js::frontend::TokenKind tt);
michael@0 912 #endif
michael@0 913
michael@0 914 #endif /* frontend_TokenStream_h */

mercurial