Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
michael@0 | 2 | * vim: set ts=8 sts=4 et sw=4 tw=99: |
michael@0 | 3 | * This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 4 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 5 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 6 | |
michael@0 | 7 | #ifndef frontend_TokenStream_h |
michael@0 | 8 | #define frontend_TokenStream_h |
michael@0 | 9 | |
michael@0 | 10 | // JS lexical scanner interface. |
michael@0 | 11 | |
michael@0 | 12 | #include "mozilla/DebugOnly.h" |
michael@0 | 13 | #include "mozilla/PodOperations.h" |
michael@0 | 14 | |
michael@0 | 15 | #include <stdarg.h> |
michael@0 | 16 | #include <stddef.h> |
michael@0 | 17 | #include <stdio.h> |
michael@0 | 18 | |
michael@0 | 19 | #include "jscntxt.h" |
michael@0 | 20 | #include "jspubtd.h" |
michael@0 | 21 | |
michael@0 | 22 | #include "js/Vector.h" |
michael@0 | 23 | #include "vm/RegExpObject.h" |
michael@0 | 24 | |
michael@0 | 25 | namespace js { |
michael@0 | 26 | namespace frontend { |
michael@0 | 27 | |
michael@0 | 28 | // Values of this type are used to index into arrays such as isExprEnding[], |
michael@0 | 29 | // so the first value must be zero. |
michael@0 | 30 | enum TokenKind { |
michael@0 | 31 | TOK_ERROR = 0, // well-known as the only code < EOF |
michael@0 | 32 | TOK_EOF, // end of file |
michael@0 | 33 | TOK_EOL, // end of line; only returned by peekTokenSameLine() |
michael@0 | 34 | TOK_SEMI, // semicolon |
michael@0 | 35 | TOK_COMMA, // comma operator |
michael@0 | 36 | TOK_HOOK, TOK_COLON, // conditional (?:) |
michael@0 | 37 | TOK_INC, TOK_DEC, // increment/decrement (++ --) |
michael@0 | 38 | TOK_DOT, // member operator (.) |
michael@0 | 39 | TOK_TRIPLEDOT, // for rest arguments (...) |
michael@0 | 40 | TOK_LB, TOK_RB, // left and right brackets |
michael@0 | 41 | TOK_LC, TOK_RC, // left and right curlies (braces) |
michael@0 | 42 | TOK_LP, TOK_RP, // left and right parentheses |
michael@0 | 43 | TOK_NAME, // identifier |
michael@0 | 44 | TOK_NUMBER, // numeric constant |
michael@0 | 45 | TOK_STRING, // string constant |
michael@0 | 46 | TOK_REGEXP, // RegExp constant |
michael@0 | 47 | TOK_TRUE, // true |
michael@0 | 48 | TOK_FALSE, // false |
michael@0 | 49 | TOK_NULL, // null |
michael@0 | 50 | TOK_THIS, // this |
michael@0 | 51 | TOK_FUNCTION, // function keyword |
michael@0 | 52 | TOK_IF, // if keyword |
michael@0 | 53 | TOK_ELSE, // else keyword |
michael@0 | 54 | TOK_SWITCH, // switch keyword |
michael@0 | 55 | TOK_CASE, // case keyword |
michael@0 | 56 | TOK_DEFAULT, // default keyword |
michael@0 | 57 | TOK_WHILE, // while keyword |
michael@0 | 58 | TOK_DO, // do keyword |
michael@0 | 59 | TOK_FOR, // for keyword |
michael@0 | 60 | TOK_BREAK, // break keyword |
michael@0 | 61 | TOK_CONTINUE, // continue keyword |
michael@0 | 62 | TOK_VAR, // var keyword |
michael@0 | 63 | TOK_CONST, // const keyword |
michael@0 | 64 | TOK_WITH, // with keyword |
michael@0 | 65 | TOK_RETURN, // return keyword |
michael@0 | 66 | TOK_NEW, // new keyword |
michael@0 | 67 | TOK_DELETE, // delete keyword |
michael@0 | 68 | TOK_TRY, // try keyword |
michael@0 | 69 | TOK_CATCH, // catch keyword |
michael@0 | 70 | TOK_FINALLY, // finally keyword |
michael@0 | 71 | TOK_THROW, // throw keyword |
michael@0 | 72 | TOK_DEBUGGER, // debugger keyword |
michael@0 | 73 | TOK_YIELD, // yield from generator function |
michael@0 | 74 | TOK_LET, // let keyword |
michael@0 | 75 | TOK_EXPORT, // export keyword |
michael@0 | 76 | TOK_IMPORT, // import keyword |
michael@0 | 77 | TOK_RESERVED, // reserved keywords |
michael@0 | 78 | TOK_STRICT_RESERVED, // reserved keywords in strict mode |
michael@0 | 79 | |
michael@0 | 80 | // The following token types occupy contiguous ranges to enable easy |
michael@0 | 81 | // range-testing. |
michael@0 | 82 | |
michael@0 | 83 | // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same |
michael@0 | 84 | // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h. |
michael@0 | 85 | TOK_OR, // logical or (||) |
michael@0 | 86 | TOK_BINOP_FIRST = TOK_OR, |
michael@0 | 87 | TOK_AND, // logical and (&&) |
michael@0 | 88 | TOK_BITOR, // bitwise-or (|) |
michael@0 | 89 | TOK_BITXOR, // bitwise-xor (^) |
michael@0 | 90 | TOK_BITAND, // bitwise-and (&) |
michael@0 | 91 | |
michael@0 | 92 | // Equality operation tokens, per TokenKindIsEquality. |
michael@0 | 93 | TOK_STRICTEQ, |
michael@0 | 94 | TOK_EQUALITY_START = TOK_STRICTEQ, |
michael@0 | 95 | TOK_EQ, |
michael@0 | 96 | TOK_STRICTNE, |
michael@0 | 97 | TOK_NE, |
michael@0 | 98 | TOK_EQUALITY_LAST = TOK_NE, |
michael@0 | 99 | |
michael@0 | 100 | // Relational ops (< <= > >=), per TokenKindIsRelational. |
michael@0 | 101 | TOK_LT, |
michael@0 | 102 | TOK_RELOP_START = TOK_LT, |
michael@0 | 103 | TOK_LE, |
michael@0 | 104 | TOK_GT, |
michael@0 | 105 | TOK_GE, |
michael@0 | 106 | TOK_RELOP_LAST = TOK_GE, |
michael@0 | 107 | |
michael@0 | 108 | TOK_INSTANCEOF, // |instanceof| keyword |
michael@0 | 109 | TOK_IN, // |in| keyword |
michael@0 | 110 | |
michael@0 | 111 | // Shift ops (<< >> >>>), per TokenKindIsShift. |
michael@0 | 112 | TOK_LSH, |
michael@0 | 113 | TOK_SHIFTOP_START = TOK_LSH, |
michael@0 | 114 | TOK_RSH, |
michael@0 | 115 | TOK_URSH, |
michael@0 | 116 | TOK_SHIFTOP_LAST = TOK_URSH, |
michael@0 | 117 | |
michael@0 | 118 | TOK_ADD, |
michael@0 | 119 | TOK_SUB, |
michael@0 | 120 | TOK_MUL, |
michael@0 | 121 | TOK_DIV, |
michael@0 | 122 | TOK_MOD, |
michael@0 | 123 | TOK_BINOP_LAST = TOK_MOD, |
michael@0 | 124 | |
michael@0 | 125 | // Unary operation tokens. |
michael@0 | 126 | TOK_TYPEOF, |
michael@0 | 127 | TOK_VOID, |
michael@0 | 128 | TOK_NOT, |
michael@0 | 129 | TOK_BITNOT, |
michael@0 | 130 | |
michael@0 | 131 | TOK_ARROW, // function arrow (=>) |
michael@0 | 132 | |
michael@0 | 133 | // Assignment ops (= += -= etc.), per TokenKindIsAssignment |
michael@0 | 134 | TOK_ASSIGN, |
michael@0 | 135 | TOK_ASSIGNMENT_START = TOK_ASSIGN, |
michael@0 | 136 | TOK_ADDASSIGN, |
michael@0 | 137 | TOK_SUBASSIGN, |
michael@0 | 138 | TOK_BITORASSIGN, |
michael@0 | 139 | TOK_BITXORASSIGN, |
michael@0 | 140 | TOK_BITANDASSIGN, |
michael@0 | 141 | TOK_LSHASSIGN, |
michael@0 | 142 | TOK_RSHASSIGN, |
michael@0 | 143 | TOK_URSHASSIGN, |
michael@0 | 144 | TOK_MULASSIGN, |
michael@0 | 145 | TOK_DIVASSIGN, |
michael@0 | 146 | TOK_MODASSIGN, |
michael@0 | 147 | TOK_ASSIGNMENT_LAST = TOK_MODASSIGN, |
michael@0 | 148 | |
michael@0 | 149 | TOK_LIMIT // domain size |
michael@0 | 150 | }; |
michael@0 | 151 | |
michael@0 | 152 | inline bool |
michael@0 | 153 | TokenKindIsBinaryOp(TokenKind tt) |
michael@0 | 154 | { |
michael@0 | 155 | return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST; |
michael@0 | 156 | } |
michael@0 | 157 | |
michael@0 | 158 | inline bool |
michael@0 | 159 | TokenKindIsEquality(TokenKind tt) |
michael@0 | 160 | { |
michael@0 | 161 | return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST; |
michael@0 | 162 | } |
michael@0 | 163 | |
michael@0 | 164 | inline bool |
michael@0 | 165 | TokenKindIsRelational(TokenKind tt) |
michael@0 | 166 | { |
michael@0 | 167 | return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST; |
michael@0 | 168 | } |
michael@0 | 169 | |
michael@0 | 170 | inline bool |
michael@0 | 171 | TokenKindIsShift(TokenKind tt) |
michael@0 | 172 | { |
michael@0 | 173 | return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST; |
michael@0 | 174 | } |
michael@0 | 175 | |
michael@0 | 176 | inline bool |
michael@0 | 177 | TokenKindIsAssignment(TokenKind tt) |
michael@0 | 178 | { |
michael@0 | 179 | return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST; |
michael@0 | 180 | } |
michael@0 | 181 | |
michael@0 | 182 | inline bool |
michael@0 | 183 | TokenKindIsDecl(TokenKind tt) |
michael@0 | 184 | { |
michael@0 | 185 | return tt == TOK_VAR || tt == TOK_LET; |
michael@0 | 186 | } |
michael@0 | 187 | |
michael@0 | 188 | struct TokenPos { |
michael@0 | 189 | uint32_t begin; // Offset of the token's first char. |
michael@0 | 190 | uint32_t end; // Offset of 1 past the token's last char. |
michael@0 | 191 | |
michael@0 | 192 | TokenPos() {} |
michael@0 | 193 | TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {} |
michael@0 | 194 | |
michael@0 | 195 | // Return a TokenPos that covers left, right, and anything in between. |
michael@0 | 196 | static TokenPos box(const TokenPos &left, const TokenPos &right) { |
michael@0 | 197 | JS_ASSERT(left.begin <= left.end); |
michael@0 | 198 | JS_ASSERT(left.end <= right.begin); |
michael@0 | 199 | JS_ASSERT(right.begin <= right.end); |
michael@0 | 200 | return TokenPos(left.begin, right.end); |
michael@0 | 201 | } |
michael@0 | 202 | |
michael@0 | 203 | bool operator==(const TokenPos& bpos) const { |
michael@0 | 204 | return begin == bpos.begin && end == bpos.end; |
michael@0 | 205 | } |
michael@0 | 206 | |
michael@0 | 207 | bool operator!=(const TokenPos& bpos) const { |
michael@0 | 208 | return begin != bpos.begin || end != bpos.end; |
michael@0 | 209 | } |
michael@0 | 210 | |
michael@0 | 211 | bool operator <(const TokenPos& bpos) const { |
michael@0 | 212 | return begin < bpos.begin; |
michael@0 | 213 | } |
michael@0 | 214 | |
michael@0 | 215 | bool operator <=(const TokenPos& bpos) const { |
michael@0 | 216 | return begin <= bpos.begin; |
michael@0 | 217 | } |
michael@0 | 218 | |
michael@0 | 219 | bool operator >(const TokenPos& bpos) const { |
michael@0 | 220 | return !(*this <= bpos); |
michael@0 | 221 | } |
michael@0 | 222 | |
michael@0 | 223 | bool operator >=(const TokenPos& bpos) const { |
michael@0 | 224 | return !(*this < bpos); |
michael@0 | 225 | } |
michael@0 | 226 | |
michael@0 | 227 | bool encloses(const TokenPos& pos) const { |
michael@0 | 228 | return begin <= pos.begin && pos.end <= end; |
michael@0 | 229 | } |
michael@0 | 230 | }; |
michael@0 | 231 | |
michael@0 | 232 | enum DecimalPoint { NoDecimal = false, HasDecimal = true }; |
michael@0 | 233 | |
michael@0 | 234 | struct Token |
michael@0 | 235 | { |
michael@0 | 236 | TokenKind type; // char value or above enumerator |
michael@0 | 237 | TokenPos pos; // token position in file |
michael@0 | 238 | union { |
michael@0 | 239 | private: |
michael@0 | 240 | friend struct Token; |
michael@0 | 241 | PropertyName *name; // non-numeric atom |
michael@0 | 242 | JSAtom *atom; // potentially-numeric atom |
michael@0 | 243 | struct { |
michael@0 | 244 | double value; // floating point number |
michael@0 | 245 | DecimalPoint decimalPoint; // literal contains '.' |
michael@0 | 246 | } number; |
michael@0 | 247 | RegExpFlag reflags; // regexp flags; use tokenbuf to access |
michael@0 | 248 | // regexp chars |
michael@0 | 249 | } u; |
michael@0 | 250 | |
michael@0 | 251 | // This constructor is necessary only for MSVC 2013 and how it compiles the |
michael@0 | 252 | // initialization of TokenStream::tokens. That field is initialized as |
michael@0 | 253 | // tokens() in the constructor init-list. This *should* zero the entire |
michael@0 | 254 | // array, then (because Token has a non-trivial constructor, because |
michael@0 | 255 | // TokenPos has a user-provided constructor) call the implicit Token |
michael@0 | 256 | // constructor on each element, which would call the TokenPos constructor |
michael@0 | 257 | // for Token::pos and do nothing. (All of which is equivalent to just |
michael@0 | 258 | // zeroing TokenStream::tokens.) But MSVC 2013 (2010/2012 don't have this |
michael@0 | 259 | // bug) doesn't zero out each element, so we need this extra constructor to |
michael@0 | 260 | // make it do the right thing. (Token is used primarily by reference or |
michael@0 | 261 | // pointer, and it's only initialized a very few places, so having a |
michael@0 | 262 | // user-defined constructor won't hurt perf.) See also bug 920318. |
michael@0 | 263 | Token() |
michael@0 | 264 | : type(TOK_ERROR), |
michael@0 | 265 | pos(0, 0) |
michael@0 | 266 | { |
michael@0 | 267 | } |
michael@0 | 268 | |
michael@0 | 269 | // Mutators |
michael@0 | 270 | |
michael@0 | 271 | void setName(PropertyName *name) { |
michael@0 | 272 | JS_ASSERT(type == TOK_NAME); |
michael@0 | 273 | JS_ASSERT(!IsPoisonedPtr(name)); |
michael@0 | 274 | u.name = name; |
michael@0 | 275 | } |
michael@0 | 276 | |
michael@0 | 277 | void setAtom(JSAtom *atom) { |
michael@0 | 278 | JS_ASSERT(type == TOK_STRING); |
michael@0 | 279 | JS_ASSERT(!IsPoisonedPtr(atom)); |
michael@0 | 280 | u.atom = atom; |
michael@0 | 281 | } |
michael@0 | 282 | |
michael@0 | 283 | void setRegExpFlags(js::RegExpFlag flags) { |
michael@0 | 284 | JS_ASSERT(type == TOK_REGEXP); |
michael@0 | 285 | JS_ASSERT((flags & AllFlags) == flags); |
michael@0 | 286 | u.reflags = flags; |
michael@0 | 287 | } |
michael@0 | 288 | |
michael@0 | 289 | void setNumber(double n, DecimalPoint decimalPoint) { |
michael@0 | 290 | JS_ASSERT(type == TOK_NUMBER); |
michael@0 | 291 | u.number.value = n; |
michael@0 | 292 | u.number.decimalPoint = decimalPoint; |
michael@0 | 293 | } |
michael@0 | 294 | |
michael@0 | 295 | // Type-safe accessors |
michael@0 | 296 | |
michael@0 | 297 | PropertyName *name() const { |
michael@0 | 298 | JS_ASSERT(type == TOK_NAME); |
michael@0 | 299 | return u.name->asPropertyName(); // poor-man's type verification |
michael@0 | 300 | } |
michael@0 | 301 | |
michael@0 | 302 | JSAtom *atom() const { |
michael@0 | 303 | JS_ASSERT(type == TOK_STRING); |
michael@0 | 304 | return u.atom; |
michael@0 | 305 | } |
michael@0 | 306 | |
michael@0 | 307 | js::RegExpFlag regExpFlags() const { |
michael@0 | 308 | JS_ASSERT(type == TOK_REGEXP); |
michael@0 | 309 | JS_ASSERT((u.reflags & AllFlags) == u.reflags); |
michael@0 | 310 | return u.reflags; |
michael@0 | 311 | } |
michael@0 | 312 | |
michael@0 | 313 | double number() const { |
michael@0 | 314 | JS_ASSERT(type == TOK_NUMBER); |
michael@0 | 315 | return u.number.value; |
michael@0 | 316 | } |
michael@0 | 317 | |
michael@0 | 318 | DecimalPoint decimalPoint() const { |
michael@0 | 319 | JS_ASSERT(type == TOK_NUMBER); |
michael@0 | 320 | return u.number.decimalPoint; |
michael@0 | 321 | } |
michael@0 | 322 | }; |
michael@0 | 323 | |
michael@0 | 324 | struct CompileError { |
michael@0 | 325 | JSErrorReport report; |
michael@0 | 326 | char *message; |
michael@0 | 327 | ErrorArgumentsType argumentsType; |
michael@0 | 328 | CompileError() |
michael@0 | 329 | : message(nullptr), argumentsType(ArgumentsAreUnicode) |
michael@0 | 330 | { |
michael@0 | 331 | mozilla::PodZero(&report); |
michael@0 | 332 | } |
michael@0 | 333 | ~CompileError(); |
michael@0 | 334 | void throwError(JSContext *cx); |
michael@0 | 335 | |
michael@0 | 336 | private: |
michael@0 | 337 | // CompileError owns raw allocated memory, so disable assignment and copying |
michael@0 | 338 | // for safety. |
michael@0 | 339 | void operator=(const CompileError &) MOZ_DELETE; |
michael@0 | 340 | CompileError(const CompileError &) MOZ_DELETE; |
michael@0 | 341 | }; |
michael@0 | 342 | |
michael@0 | 343 | // Ideally, tokenizing would be entirely independent of context. But the |
michael@0 | 344 | // strict mode flag, which is in SharedContext, affects tokenizing, and |
michael@0 | 345 | // TokenStream needs to see it. |
michael@0 | 346 | // |
michael@0 | 347 | // This class is a tiny back-channel from TokenStream to the strict mode flag |
michael@0 | 348 | // that avoids exposing the rest of SharedContext to TokenStream. |
michael@0 | 349 | // |
michael@0 | 350 | class StrictModeGetter { |
michael@0 | 351 | public: |
michael@0 | 352 | virtual bool strictMode() = 0; |
michael@0 | 353 | }; |
michael@0 | 354 | |
michael@0 | 355 | // TokenStream is the lexical scanner for Javascript source text. |
michael@0 | 356 | // |
michael@0 | 357 | // It takes a buffer of jschars and linearly scans it into |Token|s. |
michael@0 | 358 | // Internally the class uses a four element circular buffer |tokens| of |
michael@0 | 359 | // |Token|s. As an index for |tokens|, the member |cursor| points to the |
michael@0 | 360 | // current token. |
michael@0 | 361 | // Calls to getToken() increase |cursor| by one and return the new current |
michael@0 | 362 | // token. If a TokenStream was just created, the current token is initialized |
michael@0 | 363 | // with random data (i.e. not initialized). It is therefore important that |
michael@0 | 364 | // one of the first four member functions listed below is called first. |
michael@0 | 365 | // The circular buffer lets us go back up to two tokens from the last |
michael@0 | 366 | // scanned token. Internally, the relative number of backward steps that were |
michael@0 | 367 | // taken (via ungetToken()) after the last token was scanned is stored in |
michael@0 | 368 | // |lookahead|. |
michael@0 | 369 | // |
michael@0 | 370 | // The following table lists in which situations it is safe to call each listed |
michael@0 | 371 | // function. No checks are made by the functions in non-debug builds. |
michael@0 | 372 | // |
michael@0 | 373 | // Function Name | Precondition; changes to |lookahead| |
michael@0 | 374 | // ------------------+--------------------------------------------------------- |
michael@0 | 375 | // getToken | none; if |lookahead > 0| then |lookahead--| |
michael@0 | 376 | // peekToken | none; if |lookahead == 0| then |lookahead == 1| |
michael@0 | 377 | // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1| |
michael@0 | 378 | // matchToken | none; if |lookahead > 0| and the match succeeds then |
michael@0 | 379 | // | |lookahead--| |
michael@0 | 380 | // consumeKnownToken | none; if |lookahead > 0| then |lookahead--| |
michael@0 | 381 | // ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++| |
michael@0 | 382 | // |
michael@0 | 383 | // The behavior of the token scanning process (see getTokenInternal()) can be |
michael@0 | 384 | // modified by calling one of the first four above listed member functions with |
michael@0 | 385 | // an optional argument of type Modifier. However, the modifier will be |
michael@0 | 386 | // ignored unless |lookahead == 0| holds. Due to constraints of the grammar, |
michael@0 | 387 | // this turns out not to be a problem in practice. See the |
michael@0 | 388 | // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?' |
michael@0 | 389 | // for more details: |
michael@0 | 390 | // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E). |
michael@0 | 391 | // |
michael@0 | 392 | // The methods seek() and tell() allow to rescan from a previous visited |
michael@0 | 393 | // location of the buffer. |
michael@0 | 394 | // |
michael@0 | 395 | class MOZ_STACK_CLASS TokenStream |
michael@0 | 396 | { |
michael@0 | 397 | // Unicode separators that are treated as line terminators, in addition to \n, \r. |
michael@0 | 398 | enum { |
michael@0 | 399 | LINE_SEPARATOR = 0x2028, |
michael@0 | 400 | PARA_SEPARATOR = 0x2029 |
michael@0 | 401 | }; |
michael@0 | 402 | |
michael@0 | 403 | static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded |
michael@0 | 404 | // to power of 2 to avoid divmod by 3 |
michael@0 | 405 | static const unsigned maxLookahead = 2; |
michael@0 | 406 | static const unsigned ntokensMask = ntokens - 1; |
michael@0 | 407 | |
michael@0 | 408 | public: |
michael@0 | 409 | typedef Vector<jschar, 32> CharBuffer; |
michael@0 | 410 | |
michael@0 | 411 | TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options, |
michael@0 | 412 | const jschar *base, size_t length, StrictModeGetter *smg); |
michael@0 | 413 | |
michael@0 | 414 | ~TokenStream(); |
michael@0 | 415 | |
michael@0 | 416 | // Accessors. |
michael@0 | 417 | const Token ¤tToken() const { return tokens[cursor]; } |
michael@0 | 418 | bool isCurrentTokenType(TokenKind type) const { |
michael@0 | 419 | return currentToken().type == type; |
michael@0 | 420 | } |
michael@0 | 421 | const CharBuffer &getTokenbuf() const { return tokenbuf; } |
michael@0 | 422 | const char *getFilename() const { return filename; } |
michael@0 | 423 | unsigned getLineno() const { return lineno; } |
michael@0 | 424 | unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; } |
michael@0 | 425 | JSPrincipals *getOriginPrincipals() const { return originPrincipals; } |
michael@0 | 426 | JSVersion versionNumber() const { return VersionNumber(options().version); } |
michael@0 | 427 | JSVersion versionWithFlags() const { return options().version; } |
michael@0 | 428 | |
michael@0 | 429 | PropertyName *currentName() const { |
michael@0 | 430 | if (isCurrentTokenType(TOK_YIELD)) |
michael@0 | 431 | return cx->names().yield; |
michael@0 | 432 | JS_ASSERT(isCurrentTokenType(TOK_NAME)); |
michael@0 | 433 | return currentToken().name(); |
michael@0 | 434 | } |
michael@0 | 435 | |
michael@0 | 436 | bool isCurrentTokenAssignment() const { |
michael@0 | 437 | return TokenKindIsAssignment(currentToken().type); |
michael@0 | 438 | } |
michael@0 | 439 | |
michael@0 | 440 | // Flag methods. |
michael@0 | 441 | bool isEOF() const { return flags.isEOF; } |
michael@0 | 442 | bool sawOctalEscape() const { return flags.sawOctalEscape; } |
michael@0 | 443 | bool hadError() const { return flags.hadError; } |
michael@0 | 444 | |
michael@0 | 445 | // TokenStream-specific error reporters. |
michael@0 | 446 | bool reportError(unsigned errorNumber, ...); |
michael@0 | 447 | bool reportWarning(unsigned errorNumber, ...); |
michael@0 | 448 | |
michael@0 | 449 | static const uint32_t NoOffset = UINT32_MAX; |
michael@0 | 450 | |
michael@0 | 451 | // General-purpose error reporters. You should avoid calling these |
michael@0 | 452 | // directly, and instead use the more succinct alternatives (e.g. |
michael@0 | 453 | // reportError()) in TokenStream, Parser, and BytecodeEmitter. |
michael@0 | 454 | bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, |
michael@0 | 455 | va_list args); |
michael@0 | 456 | bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, |
michael@0 | 457 | va_list args); |
michael@0 | 458 | bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, |
michael@0 | 459 | va_list args); |
michael@0 | 460 | |
michael@0 | 461 | // asm.js reporter |
michael@0 | 462 | void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...); |
michael@0 | 463 | |
michael@0 | 464 | private: |
michael@0 | 465 | // These are private because they should only be called by the tokenizer |
michael@0 | 466 | // while tokenizing not by, for example, BytecodeEmitter. |
michael@0 | 467 | bool reportStrictModeError(unsigned errorNumber, ...); |
michael@0 | 468 | bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); } |
michael@0 | 469 | |
michael@0 | 470 | void onError(); |
michael@0 | 471 | static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb); |
michael@0 | 472 | bool putIdentInTokenbuf(const jschar *identStart); |
michael@0 | 473 | |
michael@0 | 474 | struct Flags |
michael@0 | 475 | { |
michael@0 | 476 | bool isEOF:1; // Hit end of file. |
michael@0 | 477 | bool isDirtyLine:1; // Non-whitespace since start of line. |
michael@0 | 478 | bool sawOctalEscape:1; // Saw an octal character escape. |
michael@0 | 479 | bool hadError:1; // Returned TOK_ERROR from getToken. |
michael@0 | 480 | |
michael@0 | 481 | Flags() |
michael@0 | 482 | : isEOF(), isDirtyLine(), sawOctalEscape(), hadError() |
michael@0 | 483 | {} |
michael@0 | 484 | }; |
michael@0 | 485 | |
michael@0 | 486 | public: |
michael@0 | 487 | // Sometimes the parser needs to modify how tokens are created. |
michael@0 | 488 | enum Modifier |
michael@0 | 489 | { |
michael@0 | 490 | None, // Normal operation. |
michael@0 | 491 | Operand, // Looking for an operand, not an operator. In |
michael@0 | 492 | // practice, this means that when '/' is seen, |
michael@0 | 493 | // we look for a regexp instead of just returning |
michael@0 | 494 | // TOK_DIV. |
michael@0 | 495 | KeywordIsName, // Treat keywords as names by returning TOK_NAME. |
michael@0 | 496 | }; |
michael@0 | 497 | |
michael@0 | 498 | // Get the next token from the stream, make it the current token, and |
michael@0 | 499 | // return its kind. |
michael@0 | 500 | TokenKind getToken(Modifier modifier = None) { |
michael@0 | 501 | // Check for a pushed-back token resulting from mismatching lookahead. |
michael@0 | 502 | if (lookahead != 0) { |
michael@0 | 503 | lookahead--; |
michael@0 | 504 | cursor = (cursor + 1) & ntokensMask; |
michael@0 | 505 | TokenKind tt = currentToken().type; |
michael@0 | 506 | JS_ASSERT(tt != TOK_EOL); |
michael@0 | 507 | return tt; |
michael@0 | 508 | } |
michael@0 | 509 | |
michael@0 | 510 | return getTokenInternal(modifier); |
michael@0 | 511 | } |
michael@0 | 512 | |
michael@0 | 513 | // Push the last scanned token back into the stream. |
michael@0 | 514 | void ungetToken() { |
michael@0 | 515 | JS_ASSERT(lookahead < maxLookahead); |
michael@0 | 516 | lookahead++; |
michael@0 | 517 | cursor = (cursor - 1) & ntokensMask; |
michael@0 | 518 | } |
michael@0 | 519 | |
michael@0 | 520 | TokenKind peekToken(Modifier modifier = None) { |
michael@0 | 521 | if (lookahead != 0) |
michael@0 | 522 | return tokens[(cursor + 1) & ntokensMask].type; |
michael@0 | 523 | TokenKind tt = getTokenInternal(modifier); |
michael@0 | 524 | ungetToken(); |
michael@0 | 525 | return tt; |
michael@0 | 526 | } |
michael@0 | 527 | |
michael@0 | 528 | TokenPos peekTokenPos(Modifier modifier = None) { |
michael@0 | 529 | if (lookahead != 0) |
michael@0 | 530 | return tokens[(cursor + 1) & ntokensMask].pos; |
michael@0 | 531 | getTokenInternal(modifier); |
michael@0 | 532 | ungetToken(); |
michael@0 | 533 | JS_ASSERT(lookahead != 0); |
michael@0 | 534 | return tokens[(cursor + 1) & ntokensMask].pos; |
michael@0 | 535 | } |
michael@0 | 536 | |
michael@0 | 537 | // This is like peekToken(), with one exception: if there is an EOL |
michael@0 | 538 | // between the end of the current token and the start of the next token, it |
michael@0 | 539 | // returns TOK_EOL. In that case, no token with TOK_EOL is actually |
michael@0 | 540 | // created, just a TOK_EOL TokenKind is returned, and currentToken() |
michael@0 | 541 | // shouldn't be consulted. (This is the only place TOK_EOL is produced.) |
michael@0 | 542 | MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) { |
michael@0 | 543 | const Token &curr = currentToken(); |
michael@0 | 544 | |
michael@0 | 545 | // If lookahead != 0, we have scanned ahead at least one token, and |
michael@0 | 546 | // |lineno| is the line that the furthest-scanned token ends on. If |
michael@0 | 547 | // it's the same as the line that the current token ends on, that's a |
michael@0 | 548 | // stronger condition than what we are looking for, and we don't need |
michael@0 | 549 | // to return TOK_EOL. |
michael@0 | 550 | if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno)) |
michael@0 | 551 | return tokens[(cursor + 1) & ntokensMask].type; |
michael@0 | 552 | |
michael@0 | 553 | // The above check misses two cases where we don't have to return |
michael@0 | 554 | // TOK_EOL. |
michael@0 | 555 | // - The next token starts on the same line, but is a multi-line token. |
michael@0 | 556 | // - The next token starts on the same line, but lookahead==2 and there |
michael@0 | 557 | // is a newline between the next token and the one after that. |
michael@0 | 558 | // The following test is somewhat expensive but gets these cases (and |
michael@0 | 559 | // all others) right. |
michael@0 | 560 | (void)getToken(modifier); |
michael@0 | 561 | const Token &next = currentToken(); |
michael@0 | 562 | ungetToken(); |
michael@0 | 563 | return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin) |
michael@0 | 564 | ? next.type |
michael@0 | 565 | : TOK_EOL; |
michael@0 | 566 | } |
michael@0 | 567 | |
michael@0 | 568 | // Get the next token from the stream if its kind is |tt|. |
michael@0 | 569 | bool matchToken(TokenKind tt, Modifier modifier = None) { |
michael@0 | 570 | if (getToken(modifier) == tt) |
michael@0 | 571 | return true; |
michael@0 | 572 | ungetToken(); |
michael@0 | 573 | return false; |
michael@0 | 574 | } |
michael@0 | 575 | |
michael@0 | 576 | void consumeKnownToken(TokenKind tt) { |
michael@0 | 577 | JS_ALWAYS_TRUE(matchToken(tt)); |
michael@0 | 578 | } |
michael@0 | 579 | |
michael@0 | 580 | bool matchContextualKeyword(Handle<PropertyName*> keyword) { |
michael@0 | 581 | if (getToken() == TOK_NAME && currentToken().name() == keyword) |
michael@0 | 582 | return true; |
michael@0 | 583 | ungetToken(); |
michael@0 | 584 | return false; |
michael@0 | 585 | } |
michael@0 | 586 | |
michael@0 | 587 | bool nextTokenEndsExpr() { |
michael@0 | 588 | return isExprEnding[peekToken()]; |
michael@0 | 589 | } |
michael@0 | 590 | |
michael@0 | 591 | class MOZ_STACK_CLASS Position { |
michael@0 | 592 | public: |
michael@0 | 593 | // The Token fields may contain pointers to atoms, so for correct |
michael@0 | 594 | // rooting we must ensure collection of atoms is disabled while objects |
michael@0 | 595 | // of this class are live. Do this by requiring a dummy AutoKeepAtoms |
michael@0 | 596 | // reference in the constructor. |
michael@0 | 597 | // |
michael@0 | 598 | // This class is explicity ignored by the analysis, so don't add any |
michael@0 | 599 | // more pointers to GC things here! |
michael@0 | 600 | Position(AutoKeepAtoms&) { } |
michael@0 | 601 | private: |
michael@0 | 602 | Position(const Position&) MOZ_DELETE; |
michael@0 | 603 | friend class TokenStream; |
michael@0 | 604 | const jschar *buf; |
michael@0 | 605 | Flags flags; |
michael@0 | 606 | unsigned lineno; |
michael@0 | 607 | const jschar *linebase; |
michael@0 | 608 | const jschar *prevLinebase; |
michael@0 | 609 | Token currentToken; |
michael@0 | 610 | unsigned lookahead; |
michael@0 | 611 | Token lookaheadTokens[maxLookahead]; |
michael@0 | 612 | }; |
michael@0 | 613 | |
michael@0 | 614 | void advance(size_t position); |
michael@0 | 615 | void tell(Position *); |
michael@0 | 616 | void seek(const Position &pos); |
michael@0 | 617 | bool seek(const Position &pos, const TokenStream &other); |
michael@0 | 618 | |
michael@0 | 619 | size_t positionToOffset(const Position &pos) const { |
michael@0 | 620 | return pos.buf - userbuf.base(); |
michael@0 | 621 | } |
michael@0 | 622 | |
michael@0 | 623 | const jschar *rawBase() const { |
michael@0 | 624 | return userbuf.base(); |
michael@0 | 625 | } |
michael@0 | 626 | |
michael@0 | 627 | const jschar *rawLimit() const { |
michael@0 | 628 | return userbuf.limit(); |
michael@0 | 629 | } |
michael@0 | 630 | |
michael@0 | 631 | bool hasDisplayURL() const { |
michael@0 | 632 | return displayURL_ != nullptr; |
michael@0 | 633 | } |
michael@0 | 634 | |
michael@0 | 635 | jschar *displayURL() { |
michael@0 | 636 | return displayURL_; |
michael@0 | 637 | } |
michael@0 | 638 | |
michael@0 | 639 | bool hasSourceMapURL() const { |
michael@0 | 640 | return sourceMapURL_ != nullptr; |
michael@0 | 641 | } |
michael@0 | 642 | |
michael@0 | 643 | jschar *sourceMapURL() { |
michael@0 | 644 | return sourceMapURL_; |
michael@0 | 645 | } |
michael@0 | 646 | |
michael@0 | 647 | // If the name at s[0:length] is not a keyword in this version, return |
michael@0 | 648 | // true with *ttp unchanged. |
michael@0 | 649 | // |
michael@0 | 650 | // If it is a reserved word in this version and strictness mode, and thus |
michael@0 | 651 | // can't be present in correct code, report a SyntaxError and return false. |
michael@0 | 652 | // |
michael@0 | 653 | // If it is a keyword, like "if", the behavior depends on ttp. If ttp is |
michael@0 | 654 | // null, report a SyntaxError ("if is a reserved identifier") and return |
michael@0 | 655 | // false. If ttp is non-null, return true with the keyword's TokenKind in |
michael@0 | 656 | // *ttp. |
michael@0 | 657 | bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp); |
michael@0 | 658 | |
michael@0 | 659 | // This class maps a userbuf offset (which is 0-indexed) to a line number |
michael@0 | 660 | // (which is 1-indexed) and a column index (which is 0-indexed). |
michael@0 | 661 | class SourceCoords |
michael@0 | 662 | { |
michael@0 | 663 | // For a given buffer holding source code, |lineStartOffsets_| has one |
michael@0 | 664 | // element per line of source code, plus one sentinel element. Each |
michael@0 | 665 | // non-sentinel element holds the buffer offset for the start of the |
michael@0 | 666 | // corresponding line of source code. For this example script: |
michael@0 | 667 | // |
michael@0 | 668 | // 1 // xyz [line starts at offset 0] |
michael@0 | 669 | // 2 var x; [line starts at offset 7] |
michael@0 | 670 | // 3 [line starts at offset 14] |
michael@0 | 671 | // 4 var y; [line starts at offset 15] |
michael@0 | 672 | // |
michael@0 | 673 | // |lineStartOffsets_| is: |
michael@0 | 674 | // |
michael@0 | 675 | // [0, 7, 14, 15, MAX_PTR] |
michael@0 | 676 | // |
michael@0 | 677 | // To convert a "line number" to a "line index" (i.e. an index into |
michael@0 | 678 | // |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's |
michael@0 | 679 | // line index is (3 - initialLineNum_), which is 2. Therefore |
michael@0 | 680 | // lineStartOffsets_[2] holds the buffer offset for the start of line 3, |
michael@0 | 681 | // which is 14. (Note that |initialLineNum_| is often 1, but not |
michael@0 | 682 | // always.) |
michael@0 | 683 | // |
michael@0 | 684 | // The first element is always 0, and the last element is always the |
michael@0 | 685 | // MAX_PTR sentinel. |
michael@0 | 686 | // |
michael@0 | 687 | // offset-to-line/column lookups are O(log n) in the worst case (binary |
michael@0 | 688 | // search), but in practice they're heavily clustered and we do better |
michael@0 | 689 | // than that by using the previous lookup's result (lastLineIndex_) as |
michael@0 | 690 | // a starting point. |
michael@0 | 691 | // |
michael@0 | 692 | // Checking if an offset lies within a particular line number |
michael@0 | 693 | // (isOnThisLine()) is O(1). |
michael@0 | 694 | // |
michael@0 | 695 | Vector<uint32_t, 128> lineStartOffsets_; |
michael@0 | 696 | uint32_t initialLineNum_; |
michael@0 | 697 | |
michael@0 | 698 | // This is mutable because it's modified on every search, but that fact |
michael@0 | 699 | // isn't visible outside this class. |
michael@0 | 700 | mutable uint32_t lastLineIndex_; |
michael@0 | 701 | |
michael@0 | 702 | uint32_t lineIndexOf(uint32_t offset) const; |
michael@0 | 703 | |
michael@0 | 704 | static const uint32_t MAX_PTR = UINT32_MAX; |
michael@0 | 705 | |
michael@0 | 706 | uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; } |
michael@0 | 707 | uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; } |
michael@0 | 708 | |
michael@0 | 709 | public: |
michael@0 | 710 | SourceCoords(ExclusiveContext *cx, uint32_t ln); |
michael@0 | 711 | |
michael@0 | 712 | void add(uint32_t lineNum, uint32_t lineStartOffset); |
michael@0 | 713 | bool fill(const SourceCoords &other); |
michael@0 | 714 | |
michael@0 | 715 | bool isOnThisLine(uint32_t offset, uint32_t lineNum) const { |
michael@0 | 716 | uint32_t lineIndex = lineNumToIndex(lineNum); |
michael@0 | 717 | JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length()); // +1 due to sentinel |
michael@0 | 718 | return lineStartOffsets_[lineIndex] <= offset && |
michael@0 | 719 | offset < lineStartOffsets_[lineIndex + 1]; |
michael@0 | 720 | } |
michael@0 | 721 | |
michael@0 | 722 | uint32_t lineNum(uint32_t offset) const; |
michael@0 | 723 | uint32_t columnIndex(uint32_t offset) const; |
michael@0 | 724 | void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const; |
michael@0 | 725 | }; |
michael@0 | 726 | |
michael@0 | 727 | SourceCoords srcCoords; |
michael@0 | 728 | |
michael@0 | 729 | JSAtomState &names() const { |
michael@0 | 730 | return cx->names(); |
michael@0 | 731 | } |
michael@0 | 732 | |
michael@0 | 733 | ExclusiveContext *context() const { |
michael@0 | 734 | return cx; |
michael@0 | 735 | } |
michael@0 | 736 | |
michael@0 | 737 | const ReadOnlyCompileOptions &options() const { |
michael@0 | 738 | return options_; |
michael@0 | 739 | } |
michael@0 | 740 | |
michael@0 | 741 | private: |
michael@0 | 742 | // This is the low-level interface to the JS source code buffer. It just |
michael@0 | 743 | // gets raw chars, basically. TokenStreams functions are layered on top |
michael@0 | 744 | // and do some extra stuff like converting all EOL sequences to '\n', |
michael@0 | 745 | // tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw |
michael@0 | 746 | // chars" refers to the lack of EOL sequence normalization.) |
michael@0 | 747 | class TokenBuf { |
michael@0 | 748 | public: |
michael@0 | 749 | TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length) |
michael@0 | 750 | : base_(buf), limit_(buf + length), ptr(buf) |
michael@0 | 751 | { } |
michael@0 | 752 | |
michael@0 | 753 | bool hasRawChars() const { |
michael@0 | 754 | return ptr < limit_; |
michael@0 | 755 | } |
michael@0 | 756 | |
michael@0 | 757 | bool atStart() const { |
michael@0 | 758 | return ptr == base_; |
michael@0 | 759 | } |
michael@0 | 760 | |
michael@0 | 761 | const jschar *base() const { |
michael@0 | 762 | return base_; |
michael@0 | 763 | } |
michael@0 | 764 | |
michael@0 | 765 | const jschar *limit() const { |
michael@0 | 766 | return limit_; |
michael@0 | 767 | } |
michael@0 | 768 | |
michael@0 | 769 | jschar getRawChar() { |
michael@0 | 770 | return *ptr++; // this will nullptr-crash if poisoned |
michael@0 | 771 | } |
michael@0 | 772 | |
michael@0 | 773 | jschar peekRawChar() const { |
michael@0 | 774 | return *ptr; // this will nullptr-crash if poisoned |
michael@0 | 775 | } |
michael@0 | 776 | |
michael@0 | 777 | bool matchRawChar(jschar c) { |
michael@0 | 778 | if (*ptr == c) { // this will nullptr-crash if poisoned |
michael@0 | 779 | ptr++; |
michael@0 | 780 | return true; |
michael@0 | 781 | } |
michael@0 | 782 | return false; |
michael@0 | 783 | } |
michael@0 | 784 | |
michael@0 | 785 | bool matchRawCharBackwards(jschar c) { |
michael@0 | 786 | JS_ASSERT(ptr); // make sure it hasn't been poisoned |
michael@0 | 787 | if (*(ptr - 1) == c) { |
michael@0 | 788 | ptr--; |
michael@0 | 789 | return true; |
michael@0 | 790 | } |
michael@0 | 791 | return false; |
michael@0 | 792 | } |
michael@0 | 793 | |
michael@0 | 794 | void ungetRawChar() { |
michael@0 | 795 | JS_ASSERT(ptr); // make sure it hasn't been poisoned |
michael@0 | 796 | ptr--; |
michael@0 | 797 | } |
michael@0 | 798 | |
michael@0 | 799 | const jschar *addressOfNextRawChar(bool allowPoisoned = false) const { |
michael@0 | 800 | JS_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned |
michael@0 | 801 | return ptr; |
michael@0 | 802 | } |
michael@0 | 803 | |
michael@0 | 804 | // Use this with caution! |
michael@0 | 805 | void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) { |
michael@0 | 806 | JS_ASSERT_IF(!allowPoisoned, a); |
michael@0 | 807 | ptr = a; |
michael@0 | 808 | } |
michael@0 | 809 | |
michael@0 | 810 | #ifdef DEBUG |
michael@0 | 811 | // Poison the TokenBuf so it cannot be accessed again. |
michael@0 | 812 | void poison() { |
michael@0 | 813 | ptr = nullptr; |
michael@0 | 814 | } |
michael@0 | 815 | #endif |
michael@0 | 816 | |
michael@0 | 817 | static bool isRawEOLChar(int32_t c) { |
michael@0 | 818 | return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR; |
michael@0 | 819 | } |
michael@0 | 820 | |
michael@0 | 821 | // Finds the next EOL, but stops once 'max' jschars have been scanned |
michael@0 | 822 | // (*including* the starting jschar). |
michael@0 | 823 | const jschar *findEOLMax(const jschar *p, size_t max); |
michael@0 | 824 | |
michael@0 | 825 | private: |
michael@0 | 826 | const jschar *base_; // base of buffer |
michael@0 | 827 | const jschar *limit_; // limit for quick bounds check |
michael@0 | 828 | const jschar *ptr; // next char to get |
michael@0 | 829 | }; |
michael@0 | 830 | |
michael@0 | 831 | TokenKind getTokenInternal(Modifier modifier); |
michael@0 | 832 | |
michael@0 | 833 | int32_t getChar(); |
michael@0 | 834 | int32_t getCharIgnoreEOL(); |
michael@0 | 835 | void ungetChar(int32_t c); |
michael@0 | 836 | void ungetCharIgnoreEOL(int32_t c); |
michael@0 | 837 | Token *newToken(ptrdiff_t adjust); |
michael@0 | 838 | bool peekUnicodeEscape(int32_t *c); |
michael@0 | 839 | bool matchUnicodeEscapeIdStart(int32_t *c); |
michael@0 | 840 | bool matchUnicodeEscapeIdent(int32_t *c); |
michael@0 | 841 | bool peekChars(int n, jschar *cp); |
michael@0 | 842 | |
michael@0 | 843 | bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); |
michael@0 | 844 | bool getDirective(bool isMultiline, bool shouldWarnDeprecated, |
michael@0 | 845 | const char *directive, int directiveLength, |
michael@0 | 846 | const char *errorMsgPragma, jschar **destination); |
michael@0 | 847 | bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated); |
michael@0 | 848 | bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated); |
michael@0 | 849 | |
michael@0 | 850 | // |expect| cannot be an EOL char. |
michael@0 | 851 | bool matchChar(int32_t expect) { |
michael@0 | 852 | MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect)); |
michael@0 | 853 | return MOZ_LIKELY(userbuf.hasRawChars()) && |
michael@0 | 854 | userbuf.matchRawChar(expect); |
michael@0 | 855 | } |
michael@0 | 856 | |
michael@0 | 857 | void consumeKnownChar(int32_t expect) { |
michael@0 | 858 | mozilla::DebugOnly<int32_t> c = getChar(); |
michael@0 | 859 | JS_ASSERT(c == expect); |
michael@0 | 860 | } |
michael@0 | 861 | |
michael@0 | 862 | int32_t peekChar() { |
michael@0 | 863 | int32_t c = getChar(); |
michael@0 | 864 | ungetChar(c); |
michael@0 | 865 | return c; |
michael@0 | 866 | } |
michael@0 | 867 | |
michael@0 | 868 | void skipChars(int n) { |
michael@0 | 869 | while (--n >= 0) |
michael@0 | 870 | getChar(); |
michael@0 | 871 | } |
michael@0 | 872 | |
michael@0 | 873 | void updateLineInfoForEOL(); |
michael@0 | 874 | void updateFlagsForEOL(); |
michael@0 | 875 | |
michael@0 | 876 | // Options used for parsing/tokenizing. |
michael@0 | 877 | const ReadOnlyCompileOptions &options_; |
michael@0 | 878 | |
michael@0 | 879 | Token tokens[ntokens]; // circular token buffer |
michael@0 | 880 | unsigned cursor; // index of last parsed token |
michael@0 | 881 | unsigned lookahead; // count of lookahead tokens |
michael@0 | 882 | unsigned lineno; // current line number |
michael@0 | 883 | Flags flags; // flags -- see above |
michael@0 | 884 | const jschar *linebase; // start of current line; points into userbuf |
michael@0 | 885 | const jschar *prevLinebase; // start of previous line; nullptr if on the first line |
michael@0 | 886 | TokenBuf userbuf; // user input buffer |
michael@0 | 887 | const char *filename; // input filename or null |
michael@0 | 888 | jschar *displayURL_; // the user's requested source URL or null |
michael@0 | 889 | jschar *sourceMapURL_; // source map's filename or null |
michael@0 | 890 | CharBuffer tokenbuf; // current token string buffer |
michael@0 | 891 | bool maybeEOL[256]; // probabilistic EOL lookup table |
michael@0 | 892 | bool maybeStrSpecial[256]; // speeds up string scanning |
michael@0 | 893 | uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs? |
michael@0 | 894 | ExclusiveContext *const cx; |
michael@0 | 895 | JSPrincipals *const originPrincipals; |
michael@0 | 896 | StrictModeGetter *strictModeGetter; // used to test for strict mode |
michael@0 | 897 | }; |
michael@0 | 898 | |
michael@0 | 899 | // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error |
michael@0 | 900 | // message have const jschar* type, not const char*. |
michael@0 | 901 | #define JSREPORT_UC 0x100 |
michael@0 | 902 | |
michael@0 | 903 | } // namespace frontend |
michael@0 | 904 | } // namespace js |
michael@0 | 905 | |
michael@0 | 906 | extern JS_FRIEND_API(int) |
michael@0 | 907 | js_fgets(char *buf, int size, FILE *file); |
michael@0 | 908 | |
michael@0 | 909 | #ifdef DEBUG |
michael@0 | 910 | extern const char * |
michael@0 | 911 | TokenKindToString(js::frontend::TokenKind tt); |
michael@0 | 912 | #endif |
michael@0 | 913 | |
michael@0 | 914 | #endif /* frontend_TokenStream_h */ |