The Tor Browser: js/src/frontend/TokenStream.h@129ffea94266

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-

     2  * vim: set ts=8 sts=4 et sw=4 tw=99:

     3  * This Source Code Form is subject to the terms of the Mozilla Public

     4  * License, v. 2.0. If a copy of the MPL was not distributed with this

     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

     7 #ifndef frontend_TokenStream_h

     8 #define frontend_TokenStream_h

    10 // JS lexical scanner interface.

    12 #include "mozilla/DebugOnly.h"

    13 #include "mozilla/PodOperations.h"

    15 #include <stdarg.h>

    16 #include <stddef.h>

    17 #include <stdio.h>

    19 #include "jscntxt.h"

    20 #include "jspubtd.h"

    22 #include "js/Vector.h"

    23 #include "vm/RegExpObject.h"

    25 namespace js {

    26 namespace frontend {

    28 // Values of this type are used to index into arrays such as isExprEnding[],

    29 // so the first value must be zero.

    30 enum TokenKind {

    31     TOK_ERROR = 0,                 // well-known as the only code < EOF

    32     TOK_EOF,                       // end of file

    33     TOK_EOL,                       // end of line; only returned by peekTokenSameLine()

    34     TOK_SEMI,                      // semicolon

    35     TOK_COMMA,                     // comma operator

    36     TOK_HOOK, TOK_COLON,           // conditional (?:)

    37     TOK_INC, TOK_DEC,              // increment/decrement (++ --)

    38     TOK_DOT,                       // member operator (.)

    39     TOK_TRIPLEDOT,                 // for rest arguments (...)

    40     TOK_LB, TOK_RB,                // left and right brackets

    41     TOK_LC, TOK_RC,                // left and right curlies (braces)

    42     TOK_LP, TOK_RP,                // left and right parentheses

    43     TOK_NAME,                      // identifier

    44     TOK_NUMBER,                    // numeric constant

    45     TOK_STRING,                    // string constant

    46     TOK_REGEXP,                    // RegExp constant

    47     TOK_TRUE,                      // true

    48     TOK_FALSE,                     // false

    49     TOK_NULL,                      // null

    50     TOK_THIS,                      // this

    51     TOK_FUNCTION,                  // function keyword

    52     TOK_IF,                        // if keyword

    53     TOK_ELSE,                      // else keyword

    54     TOK_SWITCH,                    // switch keyword

    55     TOK_CASE,                      // case keyword

    56     TOK_DEFAULT,                   // default keyword

    57     TOK_WHILE,                     // while keyword

    58     TOK_DO,                        // do keyword

    59     TOK_FOR,                       // for keyword

    60     TOK_BREAK,                     // break keyword

    61     TOK_CONTINUE,                  // continue keyword

    62     TOK_VAR,                       // var keyword

    63     TOK_CONST,                     // const keyword

    64     TOK_WITH,                      // with keyword

    65     TOK_RETURN,                    // return keyword

    66     TOK_NEW,                       // new keyword

    67     TOK_DELETE,                    // delete keyword

    68     TOK_TRY,                       // try keyword

    69     TOK_CATCH,                     // catch keyword

    70     TOK_FINALLY,                   // finally keyword

    71     TOK_THROW,                     // throw keyword

    72     TOK_DEBUGGER,                  // debugger keyword

    73     TOK_YIELD,                     // yield from generator function

    74     TOK_LET,                       // let keyword

    75     TOK_EXPORT,                    // export keyword

    76     TOK_IMPORT,                    // import keyword

    77     TOK_RESERVED,                  // reserved keywords

    78     TOK_STRICT_RESERVED,           // reserved keywords in strict mode

    80     // The following token types occupy contiguous ranges to enable easy

    81     // range-testing.

    83     // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same

    84     // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h.

    85     TOK_OR,                        // logical or (||)

    86     TOK_BINOP_FIRST = TOK_OR,

    87     TOK_AND,                       // logical and (&&)

    88     TOK_BITOR,                     // bitwise-or (|)

    89     TOK_BITXOR,                    // bitwise-xor (^)

    90     TOK_BITAND,                    // bitwise-and (&)

    92     // Equality operation tokens, per TokenKindIsEquality.

    93     TOK_STRICTEQ,

    94     TOK_EQUALITY_START = TOK_STRICTEQ,

    95     TOK_EQ,

    96     TOK_STRICTNE,

    97     TOK_NE,

    98     TOK_EQUALITY_LAST = TOK_NE,

   100     // Relational ops (< <= > >=), per TokenKindIsRelational.

   101     TOK_LT,

   102     TOK_RELOP_START = TOK_LT,

   103     TOK_LE,

   104     TOK_GT,

   105     TOK_GE,

   106     TOK_RELOP_LAST = TOK_GE,

   108     TOK_INSTANCEOF,                // |instanceof| keyword

   109     TOK_IN,                        // |in| keyword

   111     // Shift ops (<< >> >>>), per TokenKindIsShift.

   112     TOK_LSH,

   113     TOK_SHIFTOP_START = TOK_LSH,

   114     TOK_RSH,

   115     TOK_URSH,

   116     TOK_SHIFTOP_LAST = TOK_URSH,

   118     TOK_ADD,

   119     TOK_SUB,

   120     TOK_MUL,

   121     TOK_DIV,

   122     TOK_MOD,

   123     TOK_BINOP_LAST = TOK_MOD,

   125     // Unary operation tokens.

   126     TOK_TYPEOF,

   127     TOK_VOID,

   128     TOK_NOT,

   129     TOK_BITNOT,

   131     TOK_ARROW,                     // function arrow (=>)

   133     // Assignment ops (= += -= etc.), per TokenKindIsAssignment

   134     TOK_ASSIGN,

   135     TOK_ASSIGNMENT_START = TOK_ASSIGN,

   136     TOK_ADDASSIGN,

   137     TOK_SUBASSIGN,

   138     TOK_BITORASSIGN,

   139     TOK_BITXORASSIGN,

   140     TOK_BITANDASSIGN,

   141     TOK_LSHASSIGN,

   142     TOK_RSHASSIGN,

   143     TOK_URSHASSIGN,

   144     TOK_MULASSIGN,

   145     TOK_DIVASSIGN,

   146     TOK_MODASSIGN,

   147     TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,

   149     TOK_LIMIT                      // domain size

   150 };

   152 inline bool

   153 TokenKindIsBinaryOp(TokenKind tt)

   154 {

   155     return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST;

   156 }

   158 inline bool

   159 TokenKindIsEquality(TokenKind tt)

   160 {

   161     return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;

   162 }

   164 inline bool

   165 TokenKindIsRelational(TokenKind tt)

   166 {

   167     return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;

   168 }

   170 inline bool

   171 TokenKindIsShift(TokenKind tt)

   172 {

   173     return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;

   174 }

   176 inline bool

   177 TokenKindIsAssignment(TokenKind tt)

   178 {

   179     return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;

   180 }

   182 inline bool

   183 TokenKindIsDecl(TokenKind tt)

   184 {

   185     return tt == TOK_VAR || tt == TOK_LET;

   186 }

   188 struct TokenPos {

   189     uint32_t    begin;  // Offset of the token's first char.

   190     uint32_t    end;    // Offset of 1 past the token's last char.

   192     TokenPos() {}

   193     TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}

   195     // Return a TokenPos that covers left, right, and anything in between.

   196     static TokenPos box(const TokenPos &left, const TokenPos &right) {

   197         JS_ASSERT(left.begin <= left.end);

   198         JS_ASSERT(left.end <= right.begin);

   199         JS_ASSERT(right.begin <= right.end);

   200         return TokenPos(left.begin, right.end);

   201     }

   203     bool operator==(const TokenPos& bpos) const {

   204         return begin == bpos.begin && end == bpos.end;

   205     }

   207     bool operator!=(const TokenPos& bpos) const {

   208         return begin != bpos.begin || end != bpos.end;

   209     }

   211     bool operator <(const TokenPos& bpos) const {

   212         return begin < bpos.begin;

   213     }

   215     bool operator <=(const TokenPos& bpos) const {

   216         return begin <= bpos.begin;

   217     }

   219     bool operator >(const TokenPos& bpos) const {

   220         return !(*this <= bpos);

   221     }

   223     bool operator >=(const TokenPos& bpos) const {

   224         return !(*this < bpos);

   225     }

   227     bool encloses(const TokenPos& pos) const {

   228         return begin <= pos.begin && pos.end <= end;

   229     }

   230 };

   232 enum DecimalPoint { NoDecimal = false, HasDecimal = true };

   234 struct Token

   235 {

   236     TokenKind           type;           // char value or above enumerator

   237     TokenPos            pos;            // token position in file

   238     union {

   239       private:

   240         friend struct Token;

   241         PropertyName    *name;          // non-numeric atom

   242         JSAtom          *atom;          // potentially-numeric atom

   243         struct {

   244             double      value;          // floating point number

   245             DecimalPoint decimalPoint;  // literal contains '.'

   246         } number;

   247         RegExpFlag      reflags;        // regexp flags; use tokenbuf to access

   248                                         //   regexp chars

   249     } u;

   251     // This constructor is necessary only for MSVC 2013 and how it compiles the

   252     // initialization of TokenStream::tokens.  That field is initialized as

   253     // tokens() in the constructor init-list.  This *should* zero the entire

   254     // array, then (because Token has a non-trivial constructor, because

   255     // TokenPos has a user-provided constructor) call the implicit Token

   256     // constructor on each element, which would call the TokenPos constructor

   257     // for Token::pos and do nothing.  (All of which is equivalent to just

   258     // zeroing TokenStream::tokens.)  But MSVC 2013 (2010/2012 don't have this

   259     // bug) doesn't zero out each element, so we need this extra constructor to

   260     // make it do the right thing.  (Token is used primarily by reference or

   261     // pointer, and it's only initialized a very few places, so having a

   262     // user-defined constructor won't hurt perf.)  See also bug 920318.

   263     Token()

   264       : type(TOK_ERROR),

   265         pos(0, 0)

   266     {

   267     }

   269     // Mutators

   271     void setName(PropertyName *name) {

   272         JS_ASSERT(type == TOK_NAME);

   273         JS_ASSERT(!IsPoisonedPtr(name));

   274         u.name = name;

   275     }

   277     void setAtom(JSAtom *atom) {

   278         JS_ASSERT(type == TOK_STRING);

   279         JS_ASSERT(!IsPoisonedPtr(atom));

   280         u.atom = atom;

   281     }

   283     void setRegExpFlags(js::RegExpFlag flags) {

   284         JS_ASSERT(type == TOK_REGEXP);

   285         JS_ASSERT((flags & AllFlags) == flags);

   286         u.reflags = flags;

   287     }

   289     void setNumber(double n, DecimalPoint decimalPoint) {

   290         JS_ASSERT(type == TOK_NUMBER);

   291         u.number.value = n;

   292         u.number.decimalPoint = decimalPoint;

   293     }

   295     // Type-safe accessors

   297     PropertyName *name() const {

   298         JS_ASSERT(type == TOK_NAME);

   299         return u.name->asPropertyName(); // poor-man's type verification

   300     }

   302     JSAtom *atom() const {

   303         JS_ASSERT(type == TOK_STRING);

   304         return u.atom;

   305     }

   307     js::RegExpFlag regExpFlags() const {

   308         JS_ASSERT(type == TOK_REGEXP);

   309         JS_ASSERT((u.reflags & AllFlags) == u.reflags);

   310         return u.reflags;

   311     }

   313     double number() const {

   314         JS_ASSERT(type == TOK_NUMBER);

   315         return u.number.value;

   316     }

   318     DecimalPoint decimalPoint() const {

   319         JS_ASSERT(type == TOK_NUMBER);

   320         return u.number.decimalPoint;

   321     }

   322 };

   324 struct CompileError {

   325     JSErrorReport report;

   326     char *message;

   327     ErrorArgumentsType argumentsType;

   328     CompileError()

   329       : message(nullptr), argumentsType(ArgumentsAreUnicode)

   330     {

   331         mozilla::PodZero(&report);

   332     }

   333     ~CompileError();

   334     void throwError(JSContext *cx);

   336   private:

   337     // CompileError owns raw allocated memory, so disable assignment and copying

   338     // for safety.

   339     void operator=(const CompileError &) MOZ_DELETE;

   340     CompileError(const CompileError &) MOZ_DELETE;

   341 };

   343 // Ideally, tokenizing would be entirely independent of context.  But the

   344 // strict mode flag, which is in SharedContext, affects tokenizing, and

   345 // TokenStream needs to see it.

   346 //

   347 // This class is a tiny back-channel from TokenStream to the strict mode flag

   348 // that avoids exposing the rest of SharedContext to TokenStream.

   349 //

   350 class StrictModeGetter {

   351   public:

   352     virtual bool strictMode() = 0;

   353 };

   355 // TokenStream is the lexical scanner for Javascript source text.

   356 //

   357 // It takes a buffer of jschars and linearly scans it into |Token|s.

   358 // Internally the class uses a four element circular buffer |tokens| of

   359 // |Token|s. As an index for |tokens|, the member |cursor| points to the

   360 // current token.

   361 // Calls to getToken() increase |cursor| by one and return the new current

   362 // token. If a TokenStream was just created, the current token is initialized

   363 // with random data (i.e. not initialized). It is therefore important that

   364 // one of the first four member functions listed below is called first.

   365 // The circular buffer lets us go back up to two tokens from the last

   366 // scanned token. Internally, the relative number of backward steps that were

   367 // taken (via ungetToken()) after the last token was scanned is stored in

   368 // |lookahead|.

   369 //

   370 // The following table lists in which situations it is safe to call each listed

   371 // function. No checks are made by the functions in non-debug builds.

   372 //

   373 // Function Name     | Precondition; changes to |lookahead|

   374 // ------------------+---------------------------------------------------------

   375 // getToken          | none; if |lookahead > 0| then |lookahead--|

   376 // peekToken         | none; if |lookahead == 0| then |lookahead == 1|

   377 // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|

   378 // matchToken        | none; if |lookahead > 0| and the match succeeds then

   379 //                   |       |lookahead--|

   380 // consumeKnownToken | none; if |lookahead > 0| then |lookahead--|

   381 // ungetToken        | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|

   382 //

   383 // The behavior of the token scanning process (see getTokenInternal()) can be

   384 // modified by calling one of the first four above listed member functions with

   385 // an optional argument of type Modifier.  However, the modifier will be

   386 // ignored unless |lookahead == 0| holds.  Due to constraints of the grammar,

   387 // this turns out not to be a problem in practice. See the

   388 // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'

   389 // for more details:

   390 // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).

   391 //

   392 // The methods seek() and tell() allow to rescan from a previous visited

   393 // location of the buffer.

   394 //

   395 class MOZ_STACK_CLASS TokenStream

   396 {

   397     // Unicode separators that are treated as line terminators, in addition to \n, \r.

   398     enum {

   399         LINE_SEPARATOR = 0x2028,

   400         PARA_SEPARATOR = 0x2029

   401     };

   403     static const size_t ntokens = 4;                // 1 current + 2 lookahead, rounded

   404                                                     // to power of 2 to avoid divmod by 3

   405     static const unsigned maxLookahead = 2;

   406     static const unsigned ntokensMask = ntokens - 1;

   408   public:

   409     typedef Vector<jschar, 32> CharBuffer;

   411     TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,

   412                 const jschar *base, size_t length, StrictModeGetter *smg);

   414     ~TokenStream();

   416     // Accessors.

   417     const Token &currentToken() const { return tokens[cursor]; }

   418     bool isCurrentTokenType(TokenKind type) const {

   419         return currentToken().type == type;

   420     }

   421     const CharBuffer &getTokenbuf() const { return tokenbuf; }

   422     const char *getFilename() const { return filename; }

   423     unsigned getLineno() const { return lineno; }

   424     unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; }

   425     JSPrincipals *getOriginPrincipals() const { return originPrincipals; }

   426     JSVersion versionNumber() const { return VersionNumber(options().version); }

   427     JSVersion versionWithFlags() const { return options().version; }

   429     PropertyName *currentName() const {

   430         if (isCurrentTokenType(TOK_YIELD))

   431             return cx->names().yield;

   432         JS_ASSERT(isCurrentTokenType(TOK_NAME));

   433         return currentToken().name();

   434     }

   436     bool isCurrentTokenAssignment() const {

   437         return TokenKindIsAssignment(currentToken().type);

   438     }

   440     // Flag methods.

   441     bool isEOF() const { return flags.isEOF; }

   442     bool sawOctalEscape() const { return flags.sawOctalEscape; }

   443     bool hadError() const { return flags.hadError; }

   445     // TokenStream-specific error reporters.

   446     bool reportError(unsigned errorNumber, ...);

   447     bool reportWarning(unsigned errorNumber, ...);

   449     static const uint32_t NoOffset = UINT32_MAX;

   451     // General-purpose error reporters.  You should avoid calling these

   452     // directly, and instead use the more succinct alternatives (e.g.

   453     // reportError()) in TokenStream, Parser, and BytecodeEmitter.

   454     bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,

   455                                     va_list args);

   456     bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,

   457                                        va_list args);

   458     bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,

   459                                           va_list args);

   461     // asm.js reporter

   462     void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);

   464   private:

   465     // These are private because they should only be called by the tokenizer

   466     // while tokenizing not by, for example, BytecodeEmitter.

   467     bool reportStrictModeError(unsigned errorNumber, ...);

   468     bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }

   470     void onError();

   471     static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb);

   472     bool putIdentInTokenbuf(const jschar *identStart);

   474     struct Flags

   475     {

   476         bool isEOF:1;           // Hit end of file.

   477         bool isDirtyLine:1;     // Non-whitespace since start of line.

   478         bool sawOctalEscape:1;  // Saw an octal character escape.

   479         bool hadError:1;        // Returned TOK_ERROR from getToken.

   481         Flags()

   482           : isEOF(), isDirtyLine(), sawOctalEscape(), hadError()

   483         {}

   484     };

   486   public:

   487     // Sometimes the parser needs to modify how tokens are created.

   488     enum Modifier

   489     {

   490         None,           // Normal operation.

   491         Operand,        // Looking for an operand, not an operator.  In

   492                         //   practice, this means that when '/' is seen,

   493                         //   we look for a regexp instead of just returning

   494                         //   TOK_DIV.

   495         KeywordIsName,  // Treat keywords as names by returning TOK_NAME.

   496     };

   498     // Get the next token from the stream, make it the current token, and

   499     // return its kind.

   500     TokenKind getToken(Modifier modifier = None) {

   501         // Check for a pushed-back token resulting from mismatching lookahead.

   502         if (lookahead != 0) {

   503             lookahead--;

   504             cursor = (cursor + 1) & ntokensMask;

   505             TokenKind tt = currentToken().type;

   506             JS_ASSERT(tt != TOK_EOL);

   507             return tt;

   508         }

   510         return getTokenInternal(modifier);

   511     }

   513     // Push the last scanned token back into the stream.

   514     void ungetToken() {

   515         JS_ASSERT(lookahead < maxLookahead);

   516         lookahead++;

   517         cursor = (cursor - 1) & ntokensMask;

   518     }

   520     TokenKind peekToken(Modifier modifier = None) {

   521         if (lookahead != 0)

   522             return tokens[(cursor + 1) & ntokensMask].type;

   523         TokenKind tt = getTokenInternal(modifier);

   524         ungetToken();

   525         return tt;

   526     }

   528     TokenPos peekTokenPos(Modifier modifier = None) {

   529         if (lookahead != 0)

   530             return tokens[(cursor + 1) & ntokensMask].pos;

   531         getTokenInternal(modifier);

   532         ungetToken();

   533         JS_ASSERT(lookahead != 0);

   534         return tokens[(cursor + 1) & ntokensMask].pos;

   535     }

   537     // This is like peekToken(), with one exception:  if there is an EOL

   538     // between the end of the current token and the start of the next token, it

   539     // returns TOK_EOL.  In that case, no token with TOK_EOL is actually

   540     // created, just a TOK_EOL TokenKind is returned, and currentToken()

   541     // shouldn't be consulted.  (This is the only place TOK_EOL is produced.)

   542     MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) {

   543        const Token &curr = currentToken();

   545         // If lookahead != 0, we have scanned ahead at least one token, and

   546         // |lineno| is the line that the furthest-scanned token ends on.  If

   547         // it's the same as the line that the current token ends on, that's a

   548         // stronger condition than what we are looking for, and we don't need

   549         // to return TOK_EOL.

   550         if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno))

   551             return tokens[(cursor + 1) & ntokensMask].type;

   553         // The above check misses two cases where we don't have to return

   554         // TOK_EOL.

   555         // - The next token starts on the same line, but is a multi-line token.

   556         // - The next token starts on the same line, but lookahead==2 and there

   557         //   is a newline between the next token and the one after that.

   558         // The following test is somewhat expensive but gets these cases (and

   559         // all others) right.

   560         (void)getToken(modifier);

   561         const Token &next = currentToken();

   562         ungetToken();

   563         return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin)

   564                ? next.type

   565                : TOK_EOL;

   566     }

   568     // Get the next token from the stream if its kind is |tt|.

   569     bool matchToken(TokenKind tt, Modifier modifier = None) {

   570         if (getToken(modifier) == tt)

   571             return true;

   572         ungetToken();

   573         return false;

   574     }

   576     void consumeKnownToken(TokenKind tt) {

   577         JS_ALWAYS_TRUE(matchToken(tt));

   578     }

   580     bool matchContextualKeyword(Handle<PropertyName*> keyword) {

   581         if (getToken() == TOK_NAME && currentToken().name() == keyword)

   582             return true;

   583         ungetToken();

   584         return false;

   585     }

   587     bool nextTokenEndsExpr() {

   588         return isExprEnding[peekToken()];

   589     }

   591     class MOZ_STACK_CLASS Position {

   592       public:

   593         // The Token fields may contain pointers to atoms, so for correct

   594         // rooting we must ensure collection of atoms is disabled while objects

   595         // of this class are live.  Do this by requiring a dummy AutoKeepAtoms

   596         // reference in the constructor.

   597         //

   598         // This class is explicity ignored by the analysis, so don't add any

   599         // more pointers to GC things here!

   600         Position(AutoKeepAtoms&) { }

   601       private:

   602         Position(const Position&) MOZ_DELETE;

   603         friend class TokenStream;

   604         const jschar *buf;

   605         Flags flags;

   606         unsigned lineno;

   607         const jschar *linebase;

   608         const jschar *prevLinebase;

   609         Token currentToken;

   610         unsigned lookahead;

   611         Token lookaheadTokens[maxLookahead];

   612     };

   614     void advance(size_t position);

   615     void tell(Position *);

   616     void seek(const Position &pos);

   617     bool seek(const Position &pos, const TokenStream &other);

   619     size_t positionToOffset(const Position &pos) const {

   620         return pos.buf - userbuf.base();

   621     }

   623     const jschar *rawBase() const {

   624         return userbuf.base();

   625     }

   627     const jschar *rawLimit() const {

   628         return userbuf.limit();

   629     }

   631     bool hasDisplayURL() const {

   632         return displayURL_ != nullptr;

   633     }

   635     jschar *displayURL() {

   636         return displayURL_;

   637     }

   639     bool hasSourceMapURL() const {

   640         return sourceMapURL_ != nullptr;

   641     }

   643     jschar *sourceMapURL() {

   644         return sourceMapURL_;

   645     }

   647     // If the name at s[0:length] is not a keyword in this version, return

   648     // true with *ttp unchanged.

   649     //

   650     // If it is a reserved word in this version and strictness mode, and thus

   651     // can't be present in correct code, report a SyntaxError and return false.

   652     //

   653     // If it is a keyword, like "if", the behavior depends on ttp. If ttp is

   654     // null, report a SyntaxError ("if is a reserved identifier") and return

   655     // false. If ttp is non-null, return true with the keyword's TokenKind in

   656     // *ttp.

   657     bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp);

   659     // This class maps a userbuf offset (which is 0-indexed) to a line number

   660     // (which is 1-indexed) and a column index (which is 0-indexed).

   661     class SourceCoords

   662     {

   663         // For a given buffer holding source code, |lineStartOffsets_| has one

   664         // element per line of source code, plus one sentinel element.  Each

   665         // non-sentinel element holds the buffer offset for the start of the

   666         // corresponding line of source code.  For this example script:

   667         //

   668         // 1  // xyz            [line starts at offset 0]

   669         // 2  var x;            [line starts at offset 7]

   670         // 3                    [line starts at offset 14]

   671         // 4  var y;            [line starts at offset 15]

   672         //

   673         // |lineStartOffsets_| is:

   674         //

   675         //   [0, 7, 14, 15, MAX_PTR]

   676         //

   677         // To convert a "line number" to a "line index" (i.e. an index into

   678         // |lineStartOffsets_|), subtract |initialLineNum_|.  E.g. line 3's

   679         // line index is (3 - initialLineNum_), which is 2.  Therefore

   680         // lineStartOffsets_[2] holds the buffer offset for the start of line 3,

   681         // which is 14.  (Note that |initialLineNum_| is often 1, but not

   682         // always.)

   683         //

   684         // The first element is always 0, and the last element is always the

   685         // MAX_PTR sentinel.

   686         //

   687         // offset-to-line/column lookups are O(log n) in the worst case (binary

   688         // search), but in practice they're heavily clustered and we do better

   689         // than that by using the previous lookup's result (lastLineIndex_) as

   690         // a starting point.

   691         //

   692         // Checking if an offset lies within a particular line number

   693         // (isOnThisLine()) is O(1).

   694         //

   695         Vector<uint32_t, 128> lineStartOffsets_;

   696         uint32_t            initialLineNum_;

   698         // This is mutable because it's modified on every search, but that fact

   699         // isn't visible outside this class.

   700         mutable uint32_t    lastLineIndex_;

   702         uint32_t lineIndexOf(uint32_t offset) const;

   704         static const uint32_t MAX_PTR = UINT32_MAX;

   706         uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }

   707         uint32_t lineNumToIndex(uint32_t lineNum)   const { return lineNum   - initialLineNum_; }

   709       public:

   710         SourceCoords(ExclusiveContext *cx, uint32_t ln);

   712         void add(uint32_t lineNum, uint32_t lineStartOffset);

   713         bool fill(const SourceCoords &other);

   715         bool isOnThisLine(uint32_t offset, uint32_t lineNum) const {

   716             uint32_t lineIndex = lineNumToIndex(lineNum);

   717             JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length());  // +1 due to sentinel

   718             return lineStartOffsets_[lineIndex] <= offset &&

   719                    offset < lineStartOffsets_[lineIndex + 1];

   720         }

   722         uint32_t lineNum(uint32_t offset) const;

   723         uint32_t columnIndex(uint32_t offset) const;

   724         void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const;

   725     };

   727     SourceCoords srcCoords;

   729     JSAtomState &names() const {

   730         return cx->names();

   731     }

   733     ExclusiveContext *context() const {

   734         return cx;

   735     }

   737     const ReadOnlyCompileOptions &options() const {

   738         return options_;

   739     }

   741   private:

   742     // This is the low-level interface to the JS source code buffer.  It just

   743     // gets raw chars, basically.  TokenStreams functions are layered on top

   744     // and do some extra stuff like converting all EOL sequences to '\n',

   745     // tracking the line number, and setting |flags.isEOF|.  (The "raw" in "raw

   746     // chars" refers to the lack of EOL sequence normalization.)

   747     class TokenBuf {

   748       public:

   749         TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length)

   750           : base_(buf), limit_(buf + length), ptr(buf)

   751         { }

   753         bool hasRawChars() const {

   754             return ptr < limit_;

   755         }

   757         bool atStart() const {

   758             return ptr == base_;

   759         }

   761         const jschar *base() const {

   762             return base_;

   763         }

   765         const jschar *limit() const {

   766             return limit_;

   767         }

   769         jschar getRawChar() {

   770             return *ptr++;      // this will nullptr-crash if poisoned

   771         }

   773         jschar peekRawChar() const {

   774             return *ptr;        // this will nullptr-crash if poisoned

   775         }

   777         bool matchRawChar(jschar c) {

   778             if (*ptr == c) {    // this will nullptr-crash if poisoned

   779                 ptr++;

   780                 return true;

   781             }

   782             return false;

   783         }

   785         bool matchRawCharBackwards(jschar c) {

   786             JS_ASSERT(ptr);     // make sure it hasn't been poisoned

   787             if (*(ptr - 1) == c) {

   788                 ptr--;

   789                 return true;

   790             }

   791             return false;

   792         }

   794         void ungetRawChar() {

   795             JS_ASSERT(ptr);     // make sure it hasn't been poisoned

   796             ptr--;

   797         }

   799         const jschar *addressOfNextRawChar(bool allowPoisoned = false) const {

   800             JS_ASSERT_IF(!allowPoisoned, ptr);     // make sure it hasn't been poisoned

   801             return ptr;

   802         }

   804         // Use this with caution!

   805         void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) {

   806             JS_ASSERT_IF(!allowPoisoned, a);

   807             ptr = a;

   808         }

   810 #ifdef DEBUG

   811         // Poison the TokenBuf so it cannot be accessed again.

   812         void poison() {

   813             ptr = nullptr;

   814         }

   815 #endif

   817         static bool isRawEOLChar(int32_t c) {

   818             return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR;

   819         }

   821         // Finds the next EOL, but stops once 'max' jschars have been scanned

   822         // (*including* the starting jschar).

   823         const jschar *findEOLMax(const jschar *p, size_t max);

   825       private:

   826         const jschar *base_;            // base of buffer

   827         const jschar *limit_;           // limit for quick bounds check

   828         const jschar *ptr;              // next char to get

   829     };

   831     TokenKind getTokenInternal(Modifier modifier);

   833     int32_t getChar();

   834     int32_t getCharIgnoreEOL();

   835     void ungetChar(int32_t c);

   836     void ungetCharIgnoreEOL(int32_t c);

   837     Token *newToken(ptrdiff_t adjust);

   838     bool peekUnicodeEscape(int32_t *c);

   839     bool matchUnicodeEscapeIdStart(int32_t *c);

   840     bool matchUnicodeEscapeIdent(int32_t *c);

   841     bool peekChars(int n, jschar *cp);

   843     bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);

   844     bool getDirective(bool isMultiline, bool shouldWarnDeprecated,

   845                       const char *directive, int directiveLength,

   846                       const char *errorMsgPragma, jschar **destination);

   847     bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);

   848     bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);

   850     // |expect| cannot be an EOL char.

   851     bool matchChar(int32_t expect) {

   852         MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));

   853         return MOZ_LIKELY(userbuf.hasRawChars()) &&

   854                userbuf.matchRawChar(expect);

   855     }

   857     void consumeKnownChar(int32_t expect) {

   858         mozilla::DebugOnly<int32_t> c = getChar();

   859         JS_ASSERT(c == expect);

   860     }

   862     int32_t peekChar() {

   863         int32_t c = getChar();

   864         ungetChar(c);

   865         return c;

   866     }

   868     void skipChars(int n) {

   869         while (--n >= 0)

   870             getChar();

   871     }

   873     void updateLineInfoForEOL();

   874     void updateFlagsForEOL();

   876     // Options used for parsing/tokenizing.

   877     const ReadOnlyCompileOptions &options_;

   879     Token               tokens[ntokens];    // circular token buffer

   880     unsigned            cursor;             // index of last parsed token

   881     unsigned            lookahead;          // count of lookahead tokens

   882     unsigned            lineno;             // current line number

   883     Flags               flags;              // flags -- see above

   884     const jschar        *linebase;          // start of current line;  points into userbuf

   885     const jschar        *prevLinebase;      // start of previous line;  nullptr if on the first line

   886     TokenBuf            userbuf;            // user input buffer

   887     const char          *filename;          // input filename or null

   888     jschar              *displayURL_;       // the user's requested source URL or null

   889     jschar              *sourceMapURL_;     // source map's filename or null

   890     CharBuffer          tokenbuf;           // current token string buffer

   891     bool                maybeEOL[256];      // probabilistic EOL lookup table

   892     bool                maybeStrSpecial[256];   // speeds up string scanning

   893     uint8_t             isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?

   894     ExclusiveContext    *const cx;

   895     JSPrincipals        *const originPrincipals;

   896     StrictModeGetter    *strictModeGetter;  // used to test for strict mode

   897 };

   899 // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error

   900 // message have const jschar* type, not const char*.

   901 #define JSREPORT_UC 0x100

   903 } // namespace frontend

   904 } // namespace js

   906 extern JS_FRIEND_API(int)

   907 js_fgets(char *buf, int size, FILE *file);

   909 #ifdef DEBUG

   910 extern const char *

   911 TokenKindToString(js::frontend::TokenKind tt);

   912 #endif

   914 #endif /* frontend_TokenStream_h */

The Tor Browser / file revision

js/src/frontend/TokenStream.h@129ffea94266

js/src/frontend/TokenStream.h