js/src/frontend/TokenStream.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

     1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
     2  * vim: set ts=8 sts=4 et sw=4 tw=99:
     3  * This Source Code Form is subject to the terms of the Mozilla Public
     4  * License, v. 2.0. If a copy of the MPL was not distributed with this
     5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     7 #ifndef frontend_TokenStream_h
     8 #define frontend_TokenStream_h
    10 // JS lexical scanner interface.
    12 #include "mozilla/DebugOnly.h"
    13 #include "mozilla/PodOperations.h"
    15 #include <stdarg.h>
    16 #include <stddef.h>
    17 #include <stdio.h>
    19 #include "jscntxt.h"
    20 #include "jspubtd.h"
    22 #include "js/Vector.h"
    23 #include "vm/RegExpObject.h"
    25 namespace js {
    26 namespace frontend {
    28 // Values of this type are used to index into arrays such as isExprEnding[],
    29 // so the first value must be zero.
    30 enum TokenKind {
    31     TOK_ERROR = 0,                 // well-known as the only code < EOF
    32     TOK_EOF,                       // end of file
    33     TOK_EOL,                       // end of line; only returned by peekTokenSameLine()
    34     TOK_SEMI,                      // semicolon
    35     TOK_COMMA,                     // comma operator
    36     TOK_HOOK, TOK_COLON,           // conditional (?:)
    37     TOK_INC, TOK_DEC,              // increment/decrement (++ --)
    38     TOK_DOT,                       // member operator (.)
    39     TOK_TRIPLEDOT,                 // for rest arguments (...)
    40     TOK_LB, TOK_RB,                // left and right brackets
    41     TOK_LC, TOK_RC,                // left and right curlies (braces)
    42     TOK_LP, TOK_RP,                // left and right parentheses
    43     TOK_NAME,                      // identifier
    44     TOK_NUMBER,                    // numeric constant
    45     TOK_STRING,                    // string constant
    46     TOK_REGEXP,                    // RegExp constant
    47     TOK_TRUE,                      // true
    48     TOK_FALSE,                     // false
    49     TOK_NULL,                      // null
    50     TOK_THIS,                      // this
    51     TOK_FUNCTION,                  // function keyword
    52     TOK_IF,                        // if keyword
    53     TOK_ELSE,                      // else keyword
    54     TOK_SWITCH,                    // switch keyword
    55     TOK_CASE,                      // case keyword
    56     TOK_DEFAULT,                   // default keyword
    57     TOK_WHILE,                     // while keyword
    58     TOK_DO,                        // do keyword
    59     TOK_FOR,                       // for keyword
    60     TOK_BREAK,                     // break keyword
    61     TOK_CONTINUE,                  // continue keyword
    62     TOK_VAR,                       // var keyword
    63     TOK_CONST,                     // const keyword
    64     TOK_WITH,                      // with keyword
    65     TOK_RETURN,                    // return keyword
    66     TOK_NEW,                       // new keyword
    67     TOK_DELETE,                    // delete keyword
    68     TOK_TRY,                       // try keyword
    69     TOK_CATCH,                     // catch keyword
    70     TOK_FINALLY,                   // finally keyword
    71     TOK_THROW,                     // throw keyword
    72     TOK_DEBUGGER,                  // debugger keyword
    73     TOK_YIELD,                     // yield from generator function
    74     TOK_LET,                       // let keyword
    75     TOK_EXPORT,                    // export keyword
    76     TOK_IMPORT,                    // import keyword
    77     TOK_RESERVED,                  // reserved keywords
    78     TOK_STRICT_RESERVED,           // reserved keywords in strict mode
    80     // The following token types occupy contiguous ranges to enable easy
    81     // range-testing.
    83     // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same
    84     // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h.
    85     TOK_OR,                        // logical or (||)
    86     TOK_BINOP_FIRST = TOK_OR,
    87     TOK_AND,                       // logical and (&&)
    88     TOK_BITOR,                     // bitwise-or (|)
    89     TOK_BITXOR,                    // bitwise-xor (^)
    90     TOK_BITAND,                    // bitwise-and (&)
    92     // Equality operation tokens, per TokenKindIsEquality.
    93     TOK_STRICTEQ,
    94     TOK_EQUALITY_START = TOK_STRICTEQ,
    95     TOK_EQ,
    96     TOK_STRICTNE,
    97     TOK_NE,
    98     TOK_EQUALITY_LAST = TOK_NE,
   100     // Relational ops (< <= > >=), per TokenKindIsRelational.
   101     TOK_LT,
   102     TOK_RELOP_START = TOK_LT,
   103     TOK_LE,
   104     TOK_GT,
   105     TOK_GE,
   106     TOK_RELOP_LAST = TOK_GE,
   108     TOK_INSTANCEOF,                // |instanceof| keyword
   109     TOK_IN,                        // |in| keyword
   111     // Shift ops (<< >> >>>), per TokenKindIsShift.
   112     TOK_LSH,
   113     TOK_SHIFTOP_START = TOK_LSH,
   114     TOK_RSH,
   115     TOK_URSH,
   116     TOK_SHIFTOP_LAST = TOK_URSH,
   118     TOK_ADD,
   119     TOK_SUB,
   120     TOK_MUL,
   121     TOK_DIV,
   122     TOK_MOD,
   123     TOK_BINOP_LAST = TOK_MOD,
   125     // Unary operation tokens.
   126     TOK_TYPEOF,
   127     TOK_VOID,
   128     TOK_NOT,
   129     TOK_BITNOT,
   131     TOK_ARROW,                     // function arrow (=>)
   133     // Assignment ops (= += -= etc.), per TokenKindIsAssignment
   134     TOK_ASSIGN,
   135     TOK_ASSIGNMENT_START = TOK_ASSIGN,
   136     TOK_ADDASSIGN,
   137     TOK_SUBASSIGN,
   138     TOK_BITORASSIGN,
   139     TOK_BITXORASSIGN,
   140     TOK_BITANDASSIGN,
   141     TOK_LSHASSIGN,
   142     TOK_RSHASSIGN,
   143     TOK_URSHASSIGN,
   144     TOK_MULASSIGN,
   145     TOK_DIVASSIGN,
   146     TOK_MODASSIGN,
   147     TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,
   149     TOK_LIMIT                      // domain size
   150 };
   152 inline bool
   153 TokenKindIsBinaryOp(TokenKind tt)
   154 {
   155     return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST;
   156 }
   158 inline bool
   159 TokenKindIsEquality(TokenKind tt)
   160 {
   161     return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;
   162 }
   164 inline bool
   165 TokenKindIsRelational(TokenKind tt)
   166 {
   167     return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;
   168 }
   170 inline bool
   171 TokenKindIsShift(TokenKind tt)
   172 {
   173     return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;
   174 }
   176 inline bool
   177 TokenKindIsAssignment(TokenKind tt)
   178 {
   179     return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;
   180 }
   182 inline bool
   183 TokenKindIsDecl(TokenKind tt)
   184 {
   185     return tt == TOK_VAR || tt == TOK_LET;
   186 }
   188 struct TokenPos {
   189     uint32_t    begin;  // Offset of the token's first char.
   190     uint32_t    end;    // Offset of 1 past the token's last char.
   192     TokenPos() {}
   193     TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
   195     // Return a TokenPos that covers left, right, and anything in between.
   196     static TokenPos box(const TokenPos &left, const TokenPos &right) {
   197         JS_ASSERT(left.begin <= left.end);
   198         JS_ASSERT(left.end <= right.begin);
   199         JS_ASSERT(right.begin <= right.end);
   200         return TokenPos(left.begin, right.end);
   201     }
   203     bool operator==(const TokenPos& bpos) const {
   204         return begin == bpos.begin && end == bpos.end;
   205     }
   207     bool operator!=(const TokenPos& bpos) const {
   208         return begin != bpos.begin || end != bpos.end;
   209     }
   211     bool operator <(const TokenPos& bpos) const {
   212         return begin < bpos.begin;
   213     }
   215     bool operator <=(const TokenPos& bpos) const {
   216         return begin <= bpos.begin;
   217     }
   219     bool operator >(const TokenPos& bpos) const {
   220         return !(*this <= bpos);
   221     }
   223     bool operator >=(const TokenPos& bpos) const {
   224         return !(*this < bpos);
   225     }
   227     bool encloses(const TokenPos& pos) const {
   228         return begin <= pos.begin && pos.end <= end;
   229     }
   230 };
   232 enum DecimalPoint { NoDecimal = false, HasDecimal = true };
   234 struct Token
   235 {
   236     TokenKind           type;           // char value or above enumerator
   237     TokenPos            pos;            // token position in file
   238     union {
   239       private:
   240         friend struct Token;
   241         PropertyName    *name;          // non-numeric atom
   242         JSAtom          *atom;          // potentially-numeric atom
   243         struct {
   244             double      value;          // floating point number
   245             DecimalPoint decimalPoint;  // literal contains '.'
   246         } number;
   247         RegExpFlag      reflags;        // regexp flags; use tokenbuf to access
   248                                         //   regexp chars
   249     } u;
   251     // This constructor is necessary only for MSVC 2013 and how it compiles the
   252     // initialization of TokenStream::tokens.  That field is initialized as
   253     // tokens() in the constructor init-list.  This *should* zero the entire
   254     // array, then (because Token has a non-trivial constructor, because
   255     // TokenPos has a user-provided constructor) call the implicit Token
   256     // constructor on each element, which would call the TokenPos constructor
   257     // for Token::pos and do nothing.  (All of which is equivalent to just
   258     // zeroing TokenStream::tokens.)  But MSVC 2013 (2010/2012 don't have this
   259     // bug) doesn't zero out each element, so we need this extra constructor to
   260     // make it do the right thing.  (Token is used primarily by reference or
   261     // pointer, and it's only initialized a very few places, so having a
   262     // user-defined constructor won't hurt perf.)  See also bug 920318.
   263     Token()
   264       : type(TOK_ERROR),
   265         pos(0, 0)
   266     {
   267     }
   269     // Mutators
   271     void setName(PropertyName *name) {
   272         JS_ASSERT(type == TOK_NAME);
   273         JS_ASSERT(!IsPoisonedPtr(name));
   274         u.name = name;
   275     }
   277     void setAtom(JSAtom *atom) {
   278         JS_ASSERT(type == TOK_STRING);
   279         JS_ASSERT(!IsPoisonedPtr(atom));
   280         u.atom = atom;
   281     }
   283     void setRegExpFlags(js::RegExpFlag flags) {
   284         JS_ASSERT(type == TOK_REGEXP);
   285         JS_ASSERT((flags & AllFlags) == flags);
   286         u.reflags = flags;
   287     }
   289     void setNumber(double n, DecimalPoint decimalPoint) {
   290         JS_ASSERT(type == TOK_NUMBER);
   291         u.number.value = n;
   292         u.number.decimalPoint = decimalPoint;
   293     }
   295     // Type-safe accessors
   297     PropertyName *name() const {
   298         JS_ASSERT(type == TOK_NAME);
   299         return u.name->asPropertyName(); // poor-man's type verification
   300     }
   302     JSAtom *atom() const {
   303         JS_ASSERT(type == TOK_STRING);
   304         return u.atom;
   305     }
   307     js::RegExpFlag regExpFlags() const {
   308         JS_ASSERT(type == TOK_REGEXP);
   309         JS_ASSERT((u.reflags & AllFlags) == u.reflags);
   310         return u.reflags;
   311     }
   313     double number() const {
   314         JS_ASSERT(type == TOK_NUMBER);
   315         return u.number.value;
   316     }
   318     DecimalPoint decimalPoint() const {
   319         JS_ASSERT(type == TOK_NUMBER);
   320         return u.number.decimalPoint;
   321     }
   322 };
   324 struct CompileError {
   325     JSErrorReport report;
   326     char *message;
   327     ErrorArgumentsType argumentsType;
   328     CompileError()
   329       : message(nullptr), argumentsType(ArgumentsAreUnicode)
   330     {
   331         mozilla::PodZero(&report);
   332     }
   333     ~CompileError();
   334     void throwError(JSContext *cx);
   336   private:
   337     // CompileError owns raw allocated memory, so disable assignment and copying
   338     // for safety.
   339     void operator=(const CompileError &) MOZ_DELETE;
   340     CompileError(const CompileError &) MOZ_DELETE;
   341 };
   343 // Ideally, tokenizing would be entirely independent of context.  But the
   344 // strict mode flag, which is in SharedContext, affects tokenizing, and
   345 // TokenStream needs to see it.
   346 //
   347 // This class is a tiny back-channel from TokenStream to the strict mode flag
   348 // that avoids exposing the rest of SharedContext to TokenStream.
   349 //
   350 class StrictModeGetter {
   351   public:
   352     virtual bool strictMode() = 0;
   353 };
   355 // TokenStream is the lexical scanner for Javascript source text.
   356 //
   357 // It takes a buffer of jschars and linearly scans it into |Token|s.
   358 // Internally the class uses a four element circular buffer |tokens| of
   359 // |Token|s. As an index for |tokens|, the member |cursor| points to the
   360 // current token.
   361 // Calls to getToken() increase |cursor| by one and return the new current
   362 // token. If a TokenStream was just created, the current token is initialized
   363 // with random data (i.e. not initialized). It is therefore important that
   364 // one of the first four member functions listed below is called first.
   365 // The circular buffer lets us go back up to two tokens from the last
   366 // scanned token. Internally, the relative number of backward steps that were
   367 // taken (via ungetToken()) after the last token was scanned is stored in
   368 // |lookahead|.
   369 //
   370 // The following table lists in which situations it is safe to call each listed
   371 // function. No checks are made by the functions in non-debug builds.
   372 //
   373 // Function Name     | Precondition; changes to |lookahead|
   374 // ------------------+---------------------------------------------------------
   375 // getToken          | none; if |lookahead > 0| then |lookahead--|
   376 // peekToken         | none; if |lookahead == 0| then |lookahead == 1|
   377 // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
   378 // matchToken        | none; if |lookahead > 0| and the match succeeds then
   379 //                   |       |lookahead--|
   380 // consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
   381 // ungetToken        | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
   382 //
   383 // The behavior of the token scanning process (see getTokenInternal()) can be
   384 // modified by calling one of the first four above listed member functions with
   385 // an optional argument of type Modifier.  However, the modifier will be
   386 // ignored unless |lookahead == 0| holds.  Due to constraints of the grammar,
   387 // this turns out not to be a problem in practice. See the
   388 // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
   389 // for more details:
   390 // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
   391 //
   392 // The methods seek() and tell() allow to rescan from a previous visited
   393 // location of the buffer.
   394 //
   395 class MOZ_STACK_CLASS TokenStream
   396 {
   397     // Unicode separators that are treated as line terminators, in addition to \n, \r.
   398     enum {
   399         LINE_SEPARATOR = 0x2028,
   400         PARA_SEPARATOR = 0x2029
   401     };
   403     static const size_t ntokens = 4;                // 1 current + 2 lookahead, rounded
   404                                                     // to power of 2 to avoid divmod by 3
   405     static const unsigned maxLookahead = 2;
   406     static const unsigned ntokensMask = ntokens - 1;
   408   public:
   409     typedef Vector<jschar, 32> CharBuffer;
   411     TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
   412                 const jschar *base, size_t length, StrictModeGetter *smg);
   414     ~TokenStream();
   416     // Accessors.
   417     const Token &currentToken() const { return tokens[cursor]; }
   418     bool isCurrentTokenType(TokenKind type) const {
   419         return currentToken().type == type;
   420     }
   421     const CharBuffer &getTokenbuf() const { return tokenbuf; }
   422     const char *getFilename() const { return filename; }
   423     unsigned getLineno() const { return lineno; }
   424     unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; }
   425     JSPrincipals *getOriginPrincipals() const { return originPrincipals; }
   426     JSVersion versionNumber() const { return VersionNumber(options().version); }
   427     JSVersion versionWithFlags() const { return options().version; }
   429     PropertyName *currentName() const {
   430         if (isCurrentTokenType(TOK_YIELD))
   431             return cx->names().yield;
   432         JS_ASSERT(isCurrentTokenType(TOK_NAME));
   433         return currentToken().name();
   434     }
   436     bool isCurrentTokenAssignment() const {
   437         return TokenKindIsAssignment(currentToken().type);
   438     }
   440     // Flag methods.
   441     bool isEOF() const { return flags.isEOF; }
   442     bool sawOctalEscape() const { return flags.sawOctalEscape; }
   443     bool hadError() const { return flags.hadError; }
   445     // TokenStream-specific error reporters.
   446     bool reportError(unsigned errorNumber, ...);
   447     bool reportWarning(unsigned errorNumber, ...);
   449     static const uint32_t NoOffset = UINT32_MAX;
   451     // General-purpose error reporters.  You should avoid calling these
   452     // directly, and instead use the more succinct alternatives (e.g.
   453     // reportError()) in TokenStream, Parser, and BytecodeEmitter.
   454     bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
   455                                     va_list args);
   456     bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
   457                                        va_list args);
   458     bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,
   459                                           va_list args);
   461     // asm.js reporter
   462     void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
   464   private:
   465     // These are private because they should only be called by the tokenizer
   466     // while tokenizing not by, for example, BytecodeEmitter.
   467     bool reportStrictModeError(unsigned errorNumber, ...);
   468     bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
   470     void onError();
   471     static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb);
   472     bool putIdentInTokenbuf(const jschar *identStart);
   474     struct Flags
   475     {
   476         bool isEOF:1;           // Hit end of file.
   477         bool isDirtyLine:1;     // Non-whitespace since start of line.
   478         bool sawOctalEscape:1;  // Saw an octal character escape.
   479         bool hadError:1;        // Returned TOK_ERROR from getToken.
   481         Flags()
   482           : isEOF(), isDirtyLine(), sawOctalEscape(), hadError()
   483         {}
   484     };
   486   public:
   487     // Sometimes the parser needs to modify how tokens are created.
   488     enum Modifier
   489     {
   490         None,           // Normal operation.
   491         Operand,        // Looking for an operand, not an operator.  In
   492                         //   practice, this means that when '/' is seen,
   493                         //   we look for a regexp instead of just returning
   494                         //   TOK_DIV.
   495         KeywordIsName,  // Treat keywords as names by returning TOK_NAME.
   496     };
   498     // Get the next token from the stream, make it the current token, and
   499     // return its kind.
   500     TokenKind getToken(Modifier modifier = None) {
   501         // Check for a pushed-back token resulting from mismatching lookahead.
   502         if (lookahead != 0) {
   503             lookahead--;
   504             cursor = (cursor + 1) & ntokensMask;
   505             TokenKind tt = currentToken().type;
   506             JS_ASSERT(tt != TOK_EOL);
   507             return tt;
   508         }
   510         return getTokenInternal(modifier);
   511     }
   513     // Push the last scanned token back into the stream.
   514     void ungetToken() {
   515         JS_ASSERT(lookahead < maxLookahead);
   516         lookahead++;
   517         cursor = (cursor - 1) & ntokensMask;
   518     }
   520     TokenKind peekToken(Modifier modifier = None) {
   521         if (lookahead != 0)
   522             return tokens[(cursor + 1) & ntokensMask].type;
   523         TokenKind tt = getTokenInternal(modifier);
   524         ungetToken();
   525         return tt;
   526     }
   528     TokenPos peekTokenPos(Modifier modifier = None) {
   529         if (lookahead != 0)
   530             return tokens[(cursor + 1) & ntokensMask].pos;
   531         getTokenInternal(modifier);
   532         ungetToken();
   533         JS_ASSERT(lookahead != 0);
   534         return tokens[(cursor + 1) & ntokensMask].pos;
   535     }
   537     // This is like peekToken(), with one exception:  if there is an EOL
   538     // between the end of the current token and the start of the next token, it
   539     // returns TOK_EOL.  In that case, no token with TOK_EOL is actually
   540     // created, just a TOK_EOL TokenKind is returned, and currentToken()
   541     // shouldn't be consulted.  (This is the only place TOK_EOL is produced.)
   542     MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) {
   543        const Token &curr = currentToken();
   545         // If lookahead != 0, we have scanned ahead at least one token, and
   546         // |lineno| is the line that the furthest-scanned token ends on.  If
   547         // it's the same as the line that the current token ends on, that's a
   548         // stronger condition than what we are looking for, and we don't need
   549         // to return TOK_EOL.
   550         if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno))
   551             return tokens[(cursor + 1) & ntokensMask].type;
   553         // The above check misses two cases where we don't have to return
   554         // TOK_EOL.
   555         // - The next token starts on the same line, but is a multi-line token.
   556         // - The next token starts on the same line, but lookahead==2 and there
   557         //   is a newline between the next token and the one after that.
   558         // The following test is somewhat expensive but gets these cases (and
   559         // all others) right.
   560         (void)getToken(modifier);
   561         const Token &next = currentToken();
   562         ungetToken();
   563         return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin)
   564                ? next.type
   565                : TOK_EOL;
   566     }
   568     // Get the next token from the stream if its kind is |tt|.
   569     bool matchToken(TokenKind tt, Modifier modifier = None) {
   570         if (getToken(modifier) == tt)
   571             return true;
   572         ungetToken();
   573         return false;
   574     }
   576     void consumeKnownToken(TokenKind tt) {
   577         JS_ALWAYS_TRUE(matchToken(tt));
   578     }
   580     bool matchContextualKeyword(Handle<PropertyName*> keyword) {
   581         if (getToken() == TOK_NAME && currentToken().name() == keyword)
   582             return true;
   583         ungetToken();
   584         return false;
   585     }
   587     bool nextTokenEndsExpr() {
   588         return isExprEnding[peekToken()];
   589     }
   591     class MOZ_STACK_CLASS Position {
   592       public:
   593         // The Token fields may contain pointers to atoms, so for correct
   594         // rooting we must ensure collection of atoms is disabled while objects
   595         // of this class are live.  Do this by requiring a dummy AutoKeepAtoms
   596         // reference in the constructor.
   597         //
   598         // This class is explicity ignored by the analysis, so don't add any
   599         // more pointers to GC things here!
   600         Position(AutoKeepAtoms&) { }
   601       private:
   602         Position(const Position&) MOZ_DELETE;
   603         friend class TokenStream;
   604         const jschar *buf;
   605         Flags flags;
   606         unsigned lineno;
   607         const jschar *linebase;
   608         const jschar *prevLinebase;
   609         Token currentToken;
   610         unsigned lookahead;
   611         Token lookaheadTokens[maxLookahead];
   612     };
   614     void advance(size_t position);
   615     void tell(Position *);
   616     void seek(const Position &pos);
   617     bool seek(const Position &pos, const TokenStream &other);
   619     size_t positionToOffset(const Position &pos) const {
   620         return pos.buf - userbuf.base();
   621     }
   623     const jschar *rawBase() const {
   624         return userbuf.base();
   625     }
   627     const jschar *rawLimit() const {
   628         return userbuf.limit();
   629     }
   631     bool hasDisplayURL() const {
   632         return displayURL_ != nullptr;
   633     }
   635     jschar *displayURL() {
   636         return displayURL_;
   637     }
   639     bool hasSourceMapURL() const {
   640         return sourceMapURL_ != nullptr;
   641     }
   643     jschar *sourceMapURL() {
   644         return sourceMapURL_;
   645     }
   647     // If the name at s[0:length] is not a keyword in this version, return
   648     // true with *ttp unchanged.
   649     //
   650     // If it is a reserved word in this version and strictness mode, and thus
   651     // can't be present in correct code, report a SyntaxError and return false.
   652     //
   653     // If it is a keyword, like "if", the behavior depends on ttp. If ttp is
   654     // null, report a SyntaxError ("if is a reserved identifier") and return
   655     // false. If ttp is non-null, return true with the keyword's TokenKind in
   656     // *ttp.
   657     bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp);
   659     // This class maps a userbuf offset (which is 0-indexed) to a line number
   660     // (which is 1-indexed) and a column index (which is 0-indexed).
   661     class SourceCoords
   662     {
   663         // For a given buffer holding source code, |lineStartOffsets_| has one
   664         // element per line of source code, plus one sentinel element.  Each
   665         // non-sentinel element holds the buffer offset for the start of the
   666         // corresponding line of source code.  For this example script:
   667         //
   668         // 1  // xyz            [line starts at offset 0]
   669         // 2  var x;            [line starts at offset 7]
   670         // 3                    [line starts at offset 14]
   671         // 4  var y;            [line starts at offset 15]
   672         //
   673         // |lineStartOffsets_| is:
   674         //
   675         //   [0, 7, 14, 15, MAX_PTR]
   676         //
   677         // To convert a "line number" to a "line index" (i.e. an index into
   678         // |lineStartOffsets_|), subtract |initialLineNum_|.  E.g. line 3's
   679         // line index is (3 - initialLineNum_), which is 2.  Therefore
   680         // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
   681         // which is 14.  (Note that |initialLineNum_| is often 1, but not
   682         // always.)
   683         //
   684         // The first element is always 0, and the last element is always the
   685         // MAX_PTR sentinel.
   686         //
   687         // offset-to-line/column lookups are O(log n) in the worst case (binary
   688         // search), but in practice they're heavily clustered and we do better
   689         // than that by using the previous lookup's result (lastLineIndex_) as
   690         // a starting point.
   691         //
   692         // Checking if an offset lies within a particular line number
   693         // (isOnThisLine()) is O(1).
   694         //
   695         Vector<uint32_t, 128> lineStartOffsets_;
   696         uint32_t            initialLineNum_;
   698         // This is mutable because it's modified on every search, but that fact
   699         // isn't visible outside this class.
   700         mutable uint32_t    lastLineIndex_;
   702         uint32_t lineIndexOf(uint32_t offset) const;
   704         static const uint32_t MAX_PTR = UINT32_MAX;
   706         uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
   707         uint32_t lineNumToIndex(uint32_t lineNum)   const { return lineNum   - initialLineNum_; }
   709       public:
   710         SourceCoords(ExclusiveContext *cx, uint32_t ln);
   712         void add(uint32_t lineNum, uint32_t lineStartOffset);
   713         bool fill(const SourceCoords &other);
   715         bool isOnThisLine(uint32_t offset, uint32_t lineNum) const {
   716             uint32_t lineIndex = lineNumToIndex(lineNum);
   717             JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length());  // +1 due to sentinel
   718             return lineStartOffsets_[lineIndex] <= offset &&
   719                    offset < lineStartOffsets_[lineIndex + 1];
   720         }
   722         uint32_t lineNum(uint32_t offset) const;
   723         uint32_t columnIndex(uint32_t offset) const;
   724         void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const;
   725     };
   727     SourceCoords srcCoords;
   729     JSAtomState &names() const {
   730         return cx->names();
   731     }
   733     ExclusiveContext *context() const {
   734         return cx;
   735     }
   737     const ReadOnlyCompileOptions &options() const {
   738         return options_;
   739     }
   741   private:
   742     // This is the low-level interface to the JS source code buffer.  It just
   743     // gets raw chars, basically.  TokenStreams functions are layered on top
   744     // and do some extra stuff like converting all EOL sequences to '\n',
   745     // tracking the line number, and setting |flags.isEOF|.  (The "raw" in "raw
   746     // chars" refers to the lack of EOL sequence normalization.)
   747     class TokenBuf {
   748       public:
   749         TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length)
   750           : base_(buf), limit_(buf + length), ptr(buf)
   751         { }
   753         bool hasRawChars() const {
   754             return ptr < limit_;
   755         }
   757         bool atStart() const {
   758             return ptr == base_;
   759         }
   761         const jschar *base() const {
   762             return base_;
   763         }
   765         const jschar *limit() const {
   766             return limit_;
   767         }
   769         jschar getRawChar() {
   770             return *ptr++;      // this will nullptr-crash if poisoned
   771         }
   773         jschar peekRawChar() const {
   774             return *ptr;        // this will nullptr-crash if poisoned
   775         }
   777         bool matchRawChar(jschar c) {
   778             if (*ptr == c) {    // this will nullptr-crash if poisoned
   779                 ptr++;
   780                 return true;
   781             }
   782             return false;
   783         }
   785         bool matchRawCharBackwards(jschar c) {
   786             JS_ASSERT(ptr);     // make sure it hasn't been poisoned
   787             if (*(ptr - 1) == c) {
   788                 ptr--;
   789                 return true;
   790             }
   791             return false;
   792         }
   794         void ungetRawChar() {
   795             JS_ASSERT(ptr);     // make sure it hasn't been poisoned
   796             ptr--;
   797         }
   799         const jschar *addressOfNextRawChar(bool allowPoisoned = false) const {
   800             JS_ASSERT_IF(!allowPoisoned, ptr);     // make sure it hasn't been poisoned
   801             return ptr;
   802         }
   804         // Use this with caution!
   805         void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) {
   806             JS_ASSERT_IF(!allowPoisoned, a);
   807             ptr = a;
   808         }
   810 #ifdef DEBUG
   811         // Poison the TokenBuf so it cannot be accessed again.
   812         void poison() {
   813             ptr = nullptr;
   814         }
   815 #endif
   817         static bool isRawEOLChar(int32_t c) {
   818             return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR;
   819         }
   821         // Finds the next EOL, but stops once 'max' jschars have been scanned
   822         // (*including* the starting jschar).
   823         const jschar *findEOLMax(const jschar *p, size_t max);
   825       private:
   826         const jschar *base_;            // base of buffer
   827         const jschar *limit_;           // limit for quick bounds check
   828         const jschar *ptr;              // next char to get
   829     };
   831     TokenKind getTokenInternal(Modifier modifier);
   833     int32_t getChar();
   834     int32_t getCharIgnoreEOL();
   835     void ungetChar(int32_t c);
   836     void ungetCharIgnoreEOL(int32_t c);
   837     Token *newToken(ptrdiff_t adjust);
   838     bool peekUnicodeEscape(int32_t *c);
   839     bool matchUnicodeEscapeIdStart(int32_t *c);
   840     bool matchUnicodeEscapeIdent(int32_t *c);
   841     bool peekChars(int n, jschar *cp);
   843     bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
   844     bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
   845                       const char *directive, int directiveLength,
   846                       const char *errorMsgPragma, jschar **destination);
   847     bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
   848     bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);
   850     // |expect| cannot be an EOL char.
   851     bool matchChar(int32_t expect) {
   852         MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
   853         return MOZ_LIKELY(userbuf.hasRawChars()) &&
   854                userbuf.matchRawChar(expect);
   855     }
   857     void consumeKnownChar(int32_t expect) {
   858         mozilla::DebugOnly<int32_t> c = getChar();
   859         JS_ASSERT(c == expect);
   860     }
   862     int32_t peekChar() {
   863         int32_t c = getChar();
   864         ungetChar(c);
   865         return c;
   866     }
   868     void skipChars(int n) {
   869         while (--n >= 0)
   870             getChar();
   871     }
   873     void updateLineInfoForEOL();
   874     void updateFlagsForEOL();
   876     // Options used for parsing/tokenizing.
   877     const ReadOnlyCompileOptions &options_;
   879     Token               tokens[ntokens];    // circular token buffer
   880     unsigned            cursor;             // index of last parsed token
   881     unsigned            lookahead;          // count of lookahead tokens
   882     unsigned            lineno;             // current line number
   883     Flags               flags;              // flags -- see above
   884     const jschar        *linebase;          // start of current line;  points into userbuf
   885     const jschar        *prevLinebase;      // start of previous line;  nullptr if on the first line
   886     TokenBuf            userbuf;            // user input buffer
   887     const char          *filename;          // input filename or null
   888     jschar              *displayURL_;       // the user's requested source URL or null
   889     jschar              *sourceMapURL_;     // source map's filename or null
   890     CharBuffer          tokenbuf;           // current token string buffer
   891     bool                maybeEOL[256];      // probabilistic EOL lookup table
   892     bool                maybeStrSpecial[256];   // speeds up string scanning
   893     uint8_t             isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
   894     ExclusiveContext    *const cx;
   895     JSPrincipals        *const originPrincipals;
   896     StrictModeGetter    *strictModeGetter;  // used to test for strict mode
   897 };
   899 // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
   900 // message have const jschar* type, not const char*.
   901 #define JSREPORT_UC 0x100
   903 } // namespace frontend
   904 } // namespace js
   906 extern JS_FRIEND_API(int)
   907 js_fgets(char *buf, int size, FILE *file);
   909 #ifdef DEBUG
   910 extern const char *
   911 TokenKindToString(js::frontend::TokenKind tt);
   912 #endif
   914 #endif /* frontend_TokenStream_h */

mercurial