Sat, 03 Jan 2015 20:18:00 +0100
Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef frontend_TokenStream_h
8 #define frontend_TokenStream_h
10 // JS lexical scanner interface.
12 #include "mozilla/DebugOnly.h"
13 #include "mozilla/PodOperations.h"
15 #include <stdarg.h>
16 #include <stddef.h>
17 #include <stdio.h>
19 #include "jscntxt.h"
20 #include "jspubtd.h"
22 #include "js/Vector.h"
23 #include "vm/RegExpObject.h"
25 namespace js {
26 namespace frontend {
28 // Values of this type are used to index into arrays such as isExprEnding[],
29 // so the first value must be zero.
30 enum TokenKind {
31 TOK_ERROR = 0, // well-known as the only code < EOF
32 TOK_EOF, // end of file
33 TOK_EOL, // end of line; only returned by peekTokenSameLine()
34 TOK_SEMI, // semicolon
35 TOK_COMMA, // comma operator
36 TOK_HOOK, TOK_COLON, // conditional (?:)
37 TOK_INC, TOK_DEC, // increment/decrement (++ --)
38 TOK_DOT, // member operator (.)
39 TOK_TRIPLEDOT, // for rest arguments (...)
40 TOK_LB, TOK_RB, // left and right brackets
41 TOK_LC, TOK_RC, // left and right curlies (braces)
42 TOK_LP, TOK_RP, // left and right parentheses
43 TOK_NAME, // identifier
44 TOK_NUMBER, // numeric constant
45 TOK_STRING, // string constant
46 TOK_REGEXP, // RegExp constant
47 TOK_TRUE, // true
48 TOK_FALSE, // false
49 TOK_NULL, // null
50 TOK_THIS, // this
51 TOK_FUNCTION, // function keyword
52 TOK_IF, // if keyword
53 TOK_ELSE, // else keyword
54 TOK_SWITCH, // switch keyword
55 TOK_CASE, // case keyword
56 TOK_DEFAULT, // default keyword
57 TOK_WHILE, // while keyword
58 TOK_DO, // do keyword
59 TOK_FOR, // for keyword
60 TOK_BREAK, // break keyword
61 TOK_CONTINUE, // continue keyword
62 TOK_VAR, // var keyword
63 TOK_CONST, // const keyword
64 TOK_WITH, // with keyword
65 TOK_RETURN, // return keyword
66 TOK_NEW, // new keyword
67 TOK_DELETE, // delete keyword
68 TOK_TRY, // try keyword
69 TOK_CATCH, // catch keyword
70 TOK_FINALLY, // finally keyword
71 TOK_THROW, // throw keyword
72 TOK_DEBUGGER, // debugger keyword
73 TOK_YIELD, // yield from generator function
74 TOK_LET, // let keyword
75 TOK_EXPORT, // export keyword
76 TOK_IMPORT, // import keyword
77 TOK_RESERVED, // reserved keywords
78 TOK_STRICT_RESERVED, // reserved keywords in strict mode
80 // The following token types occupy contiguous ranges to enable easy
81 // range-testing.
83 // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same
84 // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h.
85 TOK_OR, // logical or (||)
86 TOK_BINOP_FIRST = TOK_OR,
87 TOK_AND, // logical and (&&)
88 TOK_BITOR, // bitwise-or (|)
89 TOK_BITXOR, // bitwise-xor (^)
90 TOK_BITAND, // bitwise-and (&)
92 // Equality operation tokens, per TokenKindIsEquality.
93 TOK_STRICTEQ,
94 TOK_EQUALITY_START = TOK_STRICTEQ,
95 TOK_EQ,
96 TOK_STRICTNE,
97 TOK_NE,
98 TOK_EQUALITY_LAST = TOK_NE,
100 // Relational ops (< <= > >=), per TokenKindIsRelational.
101 TOK_LT,
102 TOK_RELOP_START = TOK_LT,
103 TOK_LE,
104 TOK_GT,
105 TOK_GE,
106 TOK_RELOP_LAST = TOK_GE,
108 TOK_INSTANCEOF, // |instanceof| keyword
109 TOK_IN, // |in| keyword
111 // Shift ops (<< >> >>>), per TokenKindIsShift.
112 TOK_LSH,
113 TOK_SHIFTOP_START = TOK_LSH,
114 TOK_RSH,
115 TOK_URSH,
116 TOK_SHIFTOP_LAST = TOK_URSH,
118 TOK_ADD,
119 TOK_SUB,
120 TOK_MUL,
121 TOK_DIV,
122 TOK_MOD,
123 TOK_BINOP_LAST = TOK_MOD,
125 // Unary operation tokens.
126 TOK_TYPEOF,
127 TOK_VOID,
128 TOK_NOT,
129 TOK_BITNOT,
131 TOK_ARROW, // function arrow (=>)
133 // Assignment ops (= += -= etc.), per TokenKindIsAssignment
134 TOK_ASSIGN,
135 TOK_ASSIGNMENT_START = TOK_ASSIGN,
136 TOK_ADDASSIGN,
137 TOK_SUBASSIGN,
138 TOK_BITORASSIGN,
139 TOK_BITXORASSIGN,
140 TOK_BITANDASSIGN,
141 TOK_LSHASSIGN,
142 TOK_RSHASSIGN,
143 TOK_URSHASSIGN,
144 TOK_MULASSIGN,
145 TOK_DIVASSIGN,
146 TOK_MODASSIGN,
147 TOK_ASSIGNMENT_LAST = TOK_MODASSIGN,
149 TOK_LIMIT // domain size
150 };
152 inline bool
153 TokenKindIsBinaryOp(TokenKind tt)
154 {
155 return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST;
156 }
158 inline bool
159 TokenKindIsEquality(TokenKind tt)
160 {
161 return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST;
162 }
164 inline bool
165 TokenKindIsRelational(TokenKind tt)
166 {
167 return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST;
168 }
170 inline bool
171 TokenKindIsShift(TokenKind tt)
172 {
173 return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST;
174 }
176 inline bool
177 TokenKindIsAssignment(TokenKind tt)
178 {
179 return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST;
180 }
182 inline bool
183 TokenKindIsDecl(TokenKind tt)
184 {
185 return tt == TOK_VAR || tt == TOK_LET;
186 }
188 struct TokenPos {
189 uint32_t begin; // Offset of the token's first char.
190 uint32_t end; // Offset of 1 past the token's last char.
192 TokenPos() {}
193 TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {}
195 // Return a TokenPos that covers left, right, and anything in between.
196 static TokenPos box(const TokenPos &left, const TokenPos &right) {
197 JS_ASSERT(left.begin <= left.end);
198 JS_ASSERT(left.end <= right.begin);
199 JS_ASSERT(right.begin <= right.end);
200 return TokenPos(left.begin, right.end);
201 }
203 bool operator==(const TokenPos& bpos) const {
204 return begin == bpos.begin && end == bpos.end;
205 }
207 bool operator!=(const TokenPos& bpos) const {
208 return begin != bpos.begin || end != bpos.end;
209 }
211 bool operator <(const TokenPos& bpos) const {
212 return begin < bpos.begin;
213 }
215 bool operator <=(const TokenPos& bpos) const {
216 return begin <= bpos.begin;
217 }
219 bool operator >(const TokenPos& bpos) const {
220 return !(*this <= bpos);
221 }
223 bool operator >=(const TokenPos& bpos) const {
224 return !(*this < bpos);
225 }
227 bool encloses(const TokenPos& pos) const {
228 return begin <= pos.begin && pos.end <= end;
229 }
230 };
232 enum DecimalPoint { NoDecimal = false, HasDecimal = true };
234 struct Token
235 {
236 TokenKind type; // char value or above enumerator
237 TokenPos pos; // token position in file
238 union {
239 private:
240 friend struct Token;
241 PropertyName *name; // non-numeric atom
242 JSAtom *atom; // potentially-numeric atom
243 struct {
244 double value; // floating point number
245 DecimalPoint decimalPoint; // literal contains '.'
246 } number;
247 RegExpFlag reflags; // regexp flags; use tokenbuf to access
248 // regexp chars
249 } u;
251 // This constructor is necessary only for MSVC 2013 and how it compiles the
252 // initialization of TokenStream::tokens. That field is initialized as
253 // tokens() in the constructor init-list. This *should* zero the entire
254 // array, then (because Token has a non-trivial constructor, because
255 // TokenPos has a user-provided constructor) call the implicit Token
256 // constructor on each element, which would call the TokenPos constructor
257 // for Token::pos and do nothing. (All of which is equivalent to just
258 // zeroing TokenStream::tokens.) But MSVC 2013 (2010/2012 don't have this
259 // bug) doesn't zero out each element, so we need this extra constructor to
260 // make it do the right thing. (Token is used primarily by reference or
261 // pointer, and it's only initialized a very few places, so having a
262 // user-defined constructor won't hurt perf.) See also bug 920318.
263 Token()
264 : type(TOK_ERROR),
265 pos(0, 0)
266 {
267 }
269 // Mutators
271 void setName(PropertyName *name) {
272 JS_ASSERT(type == TOK_NAME);
273 JS_ASSERT(!IsPoisonedPtr(name));
274 u.name = name;
275 }
277 void setAtom(JSAtom *atom) {
278 JS_ASSERT(type == TOK_STRING);
279 JS_ASSERT(!IsPoisonedPtr(atom));
280 u.atom = atom;
281 }
283 void setRegExpFlags(js::RegExpFlag flags) {
284 JS_ASSERT(type == TOK_REGEXP);
285 JS_ASSERT((flags & AllFlags) == flags);
286 u.reflags = flags;
287 }
289 void setNumber(double n, DecimalPoint decimalPoint) {
290 JS_ASSERT(type == TOK_NUMBER);
291 u.number.value = n;
292 u.number.decimalPoint = decimalPoint;
293 }
295 // Type-safe accessors
297 PropertyName *name() const {
298 JS_ASSERT(type == TOK_NAME);
299 return u.name->asPropertyName(); // poor-man's type verification
300 }
302 JSAtom *atom() const {
303 JS_ASSERT(type == TOK_STRING);
304 return u.atom;
305 }
307 js::RegExpFlag regExpFlags() const {
308 JS_ASSERT(type == TOK_REGEXP);
309 JS_ASSERT((u.reflags & AllFlags) == u.reflags);
310 return u.reflags;
311 }
313 double number() const {
314 JS_ASSERT(type == TOK_NUMBER);
315 return u.number.value;
316 }
318 DecimalPoint decimalPoint() const {
319 JS_ASSERT(type == TOK_NUMBER);
320 return u.number.decimalPoint;
321 }
322 };
324 struct CompileError {
325 JSErrorReport report;
326 char *message;
327 ErrorArgumentsType argumentsType;
328 CompileError()
329 : message(nullptr), argumentsType(ArgumentsAreUnicode)
330 {
331 mozilla::PodZero(&report);
332 }
333 ~CompileError();
334 void throwError(JSContext *cx);
336 private:
337 // CompileError owns raw allocated memory, so disable assignment and copying
338 // for safety.
339 void operator=(const CompileError &) MOZ_DELETE;
340 CompileError(const CompileError &) MOZ_DELETE;
341 };
343 // Ideally, tokenizing would be entirely independent of context. But the
344 // strict mode flag, which is in SharedContext, affects tokenizing, and
345 // TokenStream needs to see it.
346 //
347 // This class is a tiny back-channel from TokenStream to the strict mode flag
348 // that avoids exposing the rest of SharedContext to TokenStream.
349 //
350 class StrictModeGetter {
351 public:
352 virtual bool strictMode() = 0;
353 };
355 // TokenStream is the lexical scanner for Javascript source text.
356 //
357 // It takes a buffer of jschars and linearly scans it into |Token|s.
358 // Internally the class uses a four element circular buffer |tokens| of
359 // |Token|s. As an index for |tokens|, the member |cursor| points to the
360 // current token.
361 // Calls to getToken() increase |cursor| by one and return the new current
362 // token. If a TokenStream was just created, the current token is initialized
363 // with random data (i.e. not initialized). It is therefore important that
364 // one of the first four member functions listed below is called first.
365 // The circular buffer lets us go back up to two tokens from the last
366 // scanned token. Internally, the relative number of backward steps that were
367 // taken (via ungetToken()) after the last token was scanned is stored in
368 // |lookahead|.
369 //
370 // The following table lists in which situations it is safe to call each listed
371 // function. No checks are made by the functions in non-debug builds.
372 //
373 // Function Name | Precondition; changes to |lookahead|
374 // ------------------+---------------------------------------------------------
375 // getToken | none; if |lookahead > 0| then |lookahead--|
376 // peekToken | none; if |lookahead == 0| then |lookahead == 1|
377 // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1|
378 // matchToken | none; if |lookahead > 0| and the match succeeds then
379 // | |lookahead--|
380 // consumeKnownToken | none; if |lookahead > 0| then |lookahead--|
381 // ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++|
382 //
383 // The behavior of the token scanning process (see getTokenInternal()) can be
384 // modified by calling one of the first four above listed member functions with
385 // an optional argument of type Modifier. However, the modifier will be
386 // ignored unless |lookahead == 0| holds. Due to constraints of the grammar,
387 // this turns out not to be a problem in practice. See the
388 // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?'
389 // for more details:
390 // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E).
391 //
392 // The methods seek() and tell() allow to rescan from a previous visited
393 // location of the buffer.
394 //
395 class MOZ_STACK_CLASS TokenStream
396 {
397 // Unicode separators that are treated as line terminators, in addition to \n, \r.
398 enum {
399 LINE_SEPARATOR = 0x2028,
400 PARA_SEPARATOR = 0x2029
401 };
403 static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded
404 // to power of 2 to avoid divmod by 3
405 static const unsigned maxLookahead = 2;
406 static const unsigned ntokensMask = ntokens - 1;
408 public:
409 typedef Vector<jschar, 32> CharBuffer;
411 TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
412 const jschar *base, size_t length, StrictModeGetter *smg);
414 ~TokenStream();
416 // Accessors.
417 const Token ¤tToken() const { return tokens[cursor]; }
418 bool isCurrentTokenType(TokenKind type) const {
419 return currentToken().type == type;
420 }
421 const CharBuffer &getTokenbuf() const { return tokenbuf; }
422 const char *getFilename() const { return filename; }
423 unsigned getLineno() const { return lineno; }
424 unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; }
425 JSPrincipals *getOriginPrincipals() const { return originPrincipals; }
426 JSVersion versionNumber() const { return VersionNumber(options().version); }
427 JSVersion versionWithFlags() const { return options().version; }
429 PropertyName *currentName() const {
430 if (isCurrentTokenType(TOK_YIELD))
431 return cx->names().yield;
432 JS_ASSERT(isCurrentTokenType(TOK_NAME));
433 return currentToken().name();
434 }
436 bool isCurrentTokenAssignment() const {
437 return TokenKindIsAssignment(currentToken().type);
438 }
440 // Flag methods.
441 bool isEOF() const { return flags.isEOF; }
442 bool sawOctalEscape() const { return flags.sawOctalEscape; }
443 bool hadError() const { return flags.hadError; }
445 // TokenStream-specific error reporters.
446 bool reportError(unsigned errorNumber, ...);
447 bool reportWarning(unsigned errorNumber, ...);
449 static const uint32_t NoOffset = UINT32_MAX;
451 // General-purpose error reporters. You should avoid calling these
452 // directly, and instead use the more succinct alternatives (e.g.
453 // reportError()) in TokenStream, Parser, and BytecodeEmitter.
454 bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
455 va_list args);
456 bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
457 va_list args);
458 bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber,
459 va_list args);
461 // asm.js reporter
462 void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...);
464 private:
465 // These are private because they should only be called by the tokenizer
466 // while tokenizing not by, for example, BytecodeEmitter.
467 bool reportStrictModeError(unsigned errorNumber, ...);
468 bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); }
470 void onError();
471 static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb);
472 bool putIdentInTokenbuf(const jschar *identStart);
474 struct Flags
475 {
476 bool isEOF:1; // Hit end of file.
477 bool isDirtyLine:1; // Non-whitespace since start of line.
478 bool sawOctalEscape:1; // Saw an octal character escape.
479 bool hadError:1; // Returned TOK_ERROR from getToken.
481 Flags()
482 : isEOF(), isDirtyLine(), sawOctalEscape(), hadError()
483 {}
484 };
486 public:
487 // Sometimes the parser needs to modify how tokens are created.
488 enum Modifier
489 {
490 None, // Normal operation.
491 Operand, // Looking for an operand, not an operator. In
492 // practice, this means that when '/' is seen,
493 // we look for a regexp instead of just returning
494 // TOK_DIV.
495 KeywordIsName, // Treat keywords as names by returning TOK_NAME.
496 };
498 // Get the next token from the stream, make it the current token, and
499 // return its kind.
500 TokenKind getToken(Modifier modifier = None) {
501 // Check for a pushed-back token resulting from mismatching lookahead.
502 if (lookahead != 0) {
503 lookahead--;
504 cursor = (cursor + 1) & ntokensMask;
505 TokenKind tt = currentToken().type;
506 JS_ASSERT(tt != TOK_EOL);
507 return tt;
508 }
510 return getTokenInternal(modifier);
511 }
513 // Push the last scanned token back into the stream.
514 void ungetToken() {
515 JS_ASSERT(lookahead < maxLookahead);
516 lookahead++;
517 cursor = (cursor - 1) & ntokensMask;
518 }
520 TokenKind peekToken(Modifier modifier = None) {
521 if (lookahead != 0)
522 return tokens[(cursor + 1) & ntokensMask].type;
523 TokenKind tt = getTokenInternal(modifier);
524 ungetToken();
525 return tt;
526 }
528 TokenPos peekTokenPos(Modifier modifier = None) {
529 if (lookahead != 0)
530 return tokens[(cursor + 1) & ntokensMask].pos;
531 getTokenInternal(modifier);
532 ungetToken();
533 JS_ASSERT(lookahead != 0);
534 return tokens[(cursor + 1) & ntokensMask].pos;
535 }
537 // This is like peekToken(), with one exception: if there is an EOL
538 // between the end of the current token and the start of the next token, it
539 // returns TOK_EOL. In that case, no token with TOK_EOL is actually
540 // created, just a TOK_EOL TokenKind is returned, and currentToken()
541 // shouldn't be consulted. (This is the only place TOK_EOL is produced.)
542 MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) {
543 const Token &curr = currentToken();
545 // If lookahead != 0, we have scanned ahead at least one token, and
546 // |lineno| is the line that the furthest-scanned token ends on. If
547 // it's the same as the line that the current token ends on, that's a
548 // stronger condition than what we are looking for, and we don't need
549 // to return TOK_EOL.
550 if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno))
551 return tokens[(cursor + 1) & ntokensMask].type;
553 // The above check misses two cases where we don't have to return
554 // TOK_EOL.
555 // - The next token starts on the same line, but is a multi-line token.
556 // - The next token starts on the same line, but lookahead==2 and there
557 // is a newline between the next token and the one after that.
558 // The following test is somewhat expensive but gets these cases (and
559 // all others) right.
560 (void)getToken(modifier);
561 const Token &next = currentToken();
562 ungetToken();
563 return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin)
564 ? next.type
565 : TOK_EOL;
566 }
568 // Get the next token from the stream if its kind is |tt|.
569 bool matchToken(TokenKind tt, Modifier modifier = None) {
570 if (getToken(modifier) == tt)
571 return true;
572 ungetToken();
573 return false;
574 }
576 void consumeKnownToken(TokenKind tt) {
577 JS_ALWAYS_TRUE(matchToken(tt));
578 }
580 bool matchContextualKeyword(Handle<PropertyName*> keyword) {
581 if (getToken() == TOK_NAME && currentToken().name() == keyword)
582 return true;
583 ungetToken();
584 return false;
585 }
587 bool nextTokenEndsExpr() {
588 return isExprEnding[peekToken()];
589 }
591 class MOZ_STACK_CLASS Position {
592 public:
593 // The Token fields may contain pointers to atoms, so for correct
594 // rooting we must ensure collection of atoms is disabled while objects
595 // of this class are live. Do this by requiring a dummy AutoKeepAtoms
596 // reference in the constructor.
597 //
598 // This class is explicity ignored by the analysis, so don't add any
599 // more pointers to GC things here!
600 Position(AutoKeepAtoms&) { }
601 private:
602 Position(const Position&) MOZ_DELETE;
603 friend class TokenStream;
604 const jschar *buf;
605 Flags flags;
606 unsigned lineno;
607 const jschar *linebase;
608 const jschar *prevLinebase;
609 Token currentToken;
610 unsigned lookahead;
611 Token lookaheadTokens[maxLookahead];
612 };
614 void advance(size_t position);
615 void tell(Position *);
616 void seek(const Position &pos);
617 bool seek(const Position &pos, const TokenStream &other);
619 size_t positionToOffset(const Position &pos) const {
620 return pos.buf - userbuf.base();
621 }
623 const jschar *rawBase() const {
624 return userbuf.base();
625 }
627 const jschar *rawLimit() const {
628 return userbuf.limit();
629 }
631 bool hasDisplayURL() const {
632 return displayURL_ != nullptr;
633 }
635 jschar *displayURL() {
636 return displayURL_;
637 }
639 bool hasSourceMapURL() const {
640 return sourceMapURL_ != nullptr;
641 }
643 jschar *sourceMapURL() {
644 return sourceMapURL_;
645 }
647 // If the name at s[0:length] is not a keyword in this version, return
648 // true with *ttp unchanged.
649 //
650 // If it is a reserved word in this version and strictness mode, and thus
651 // can't be present in correct code, report a SyntaxError and return false.
652 //
653 // If it is a keyword, like "if", the behavior depends on ttp. If ttp is
654 // null, report a SyntaxError ("if is a reserved identifier") and return
655 // false. If ttp is non-null, return true with the keyword's TokenKind in
656 // *ttp.
657 bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp);
659 // This class maps a userbuf offset (which is 0-indexed) to a line number
660 // (which is 1-indexed) and a column index (which is 0-indexed).
661 class SourceCoords
662 {
663 // For a given buffer holding source code, |lineStartOffsets_| has one
664 // element per line of source code, plus one sentinel element. Each
665 // non-sentinel element holds the buffer offset for the start of the
666 // corresponding line of source code. For this example script:
667 //
668 // 1 // xyz [line starts at offset 0]
669 // 2 var x; [line starts at offset 7]
670 // 3 [line starts at offset 14]
671 // 4 var y; [line starts at offset 15]
672 //
673 // |lineStartOffsets_| is:
674 //
675 // [0, 7, 14, 15, MAX_PTR]
676 //
677 // To convert a "line number" to a "line index" (i.e. an index into
678 // |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's
679 // line index is (3 - initialLineNum_), which is 2. Therefore
680 // lineStartOffsets_[2] holds the buffer offset for the start of line 3,
681 // which is 14. (Note that |initialLineNum_| is often 1, but not
682 // always.)
683 //
684 // The first element is always 0, and the last element is always the
685 // MAX_PTR sentinel.
686 //
687 // offset-to-line/column lookups are O(log n) in the worst case (binary
688 // search), but in practice they're heavily clustered and we do better
689 // than that by using the previous lookup's result (lastLineIndex_) as
690 // a starting point.
691 //
692 // Checking if an offset lies within a particular line number
693 // (isOnThisLine()) is O(1).
694 //
695 Vector<uint32_t, 128> lineStartOffsets_;
696 uint32_t initialLineNum_;
698 // This is mutable because it's modified on every search, but that fact
699 // isn't visible outside this class.
700 mutable uint32_t lastLineIndex_;
702 uint32_t lineIndexOf(uint32_t offset) const;
704 static const uint32_t MAX_PTR = UINT32_MAX;
706 uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; }
707 uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; }
709 public:
710 SourceCoords(ExclusiveContext *cx, uint32_t ln);
712 void add(uint32_t lineNum, uint32_t lineStartOffset);
713 bool fill(const SourceCoords &other);
715 bool isOnThisLine(uint32_t offset, uint32_t lineNum) const {
716 uint32_t lineIndex = lineNumToIndex(lineNum);
717 JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length()); // +1 due to sentinel
718 return lineStartOffsets_[lineIndex] <= offset &&
719 offset < lineStartOffsets_[lineIndex + 1];
720 }
722 uint32_t lineNum(uint32_t offset) const;
723 uint32_t columnIndex(uint32_t offset) const;
724 void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const;
725 };
727 SourceCoords srcCoords;
729 JSAtomState &names() const {
730 return cx->names();
731 }
733 ExclusiveContext *context() const {
734 return cx;
735 }
737 const ReadOnlyCompileOptions &options() const {
738 return options_;
739 }
741 private:
742 // This is the low-level interface to the JS source code buffer. It just
743 // gets raw chars, basically. TokenStreams functions are layered on top
744 // and do some extra stuff like converting all EOL sequences to '\n',
745 // tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw
746 // chars" refers to the lack of EOL sequence normalization.)
747 class TokenBuf {
748 public:
749 TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length)
750 : base_(buf), limit_(buf + length), ptr(buf)
751 { }
753 bool hasRawChars() const {
754 return ptr < limit_;
755 }
757 bool atStart() const {
758 return ptr == base_;
759 }
761 const jschar *base() const {
762 return base_;
763 }
765 const jschar *limit() const {
766 return limit_;
767 }
769 jschar getRawChar() {
770 return *ptr++; // this will nullptr-crash if poisoned
771 }
773 jschar peekRawChar() const {
774 return *ptr; // this will nullptr-crash if poisoned
775 }
777 bool matchRawChar(jschar c) {
778 if (*ptr == c) { // this will nullptr-crash if poisoned
779 ptr++;
780 return true;
781 }
782 return false;
783 }
785 bool matchRawCharBackwards(jschar c) {
786 JS_ASSERT(ptr); // make sure it hasn't been poisoned
787 if (*(ptr - 1) == c) {
788 ptr--;
789 return true;
790 }
791 return false;
792 }
794 void ungetRawChar() {
795 JS_ASSERT(ptr); // make sure it hasn't been poisoned
796 ptr--;
797 }
799 const jschar *addressOfNextRawChar(bool allowPoisoned = false) const {
800 JS_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned
801 return ptr;
802 }
804 // Use this with caution!
805 void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) {
806 JS_ASSERT_IF(!allowPoisoned, a);
807 ptr = a;
808 }
810 #ifdef DEBUG
811 // Poison the TokenBuf so it cannot be accessed again.
812 void poison() {
813 ptr = nullptr;
814 }
815 #endif
817 static bool isRawEOLChar(int32_t c) {
818 return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR;
819 }
821 // Finds the next EOL, but stops once 'max' jschars have been scanned
822 // (*including* the starting jschar).
823 const jschar *findEOLMax(const jschar *p, size_t max);
825 private:
826 const jschar *base_; // base of buffer
827 const jschar *limit_; // limit for quick bounds check
828 const jschar *ptr; // next char to get
829 };
831 TokenKind getTokenInternal(Modifier modifier);
833 int32_t getChar();
834 int32_t getCharIgnoreEOL();
835 void ungetChar(int32_t c);
836 void ungetCharIgnoreEOL(int32_t c);
837 Token *newToken(ptrdiff_t adjust);
838 bool peekUnicodeEscape(int32_t *c);
839 bool matchUnicodeEscapeIdStart(int32_t *c);
840 bool matchUnicodeEscapeIdent(int32_t *c);
841 bool peekChars(int n, jschar *cp);
843 bool getDirectives(bool isMultiline, bool shouldWarnDeprecated);
844 bool getDirective(bool isMultiline, bool shouldWarnDeprecated,
845 const char *directive, int directiveLength,
846 const char *errorMsgPragma, jschar **destination);
847 bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated);
848 bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated);
850 // |expect| cannot be an EOL char.
851 bool matchChar(int32_t expect) {
852 MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect));
853 return MOZ_LIKELY(userbuf.hasRawChars()) &&
854 userbuf.matchRawChar(expect);
855 }
857 void consumeKnownChar(int32_t expect) {
858 mozilla::DebugOnly<int32_t> c = getChar();
859 JS_ASSERT(c == expect);
860 }
862 int32_t peekChar() {
863 int32_t c = getChar();
864 ungetChar(c);
865 return c;
866 }
868 void skipChars(int n) {
869 while (--n >= 0)
870 getChar();
871 }
873 void updateLineInfoForEOL();
874 void updateFlagsForEOL();
876 // Options used for parsing/tokenizing.
877 const ReadOnlyCompileOptions &options_;
879 Token tokens[ntokens]; // circular token buffer
880 unsigned cursor; // index of last parsed token
881 unsigned lookahead; // count of lookahead tokens
882 unsigned lineno; // current line number
883 Flags flags; // flags -- see above
884 const jschar *linebase; // start of current line; points into userbuf
885 const jschar *prevLinebase; // start of previous line; nullptr if on the first line
886 TokenBuf userbuf; // user input buffer
887 const char *filename; // input filename or null
888 jschar *displayURL_; // the user's requested source URL or null
889 jschar *sourceMapURL_; // source map's filename or null
890 CharBuffer tokenbuf; // current token string buffer
891 bool maybeEOL[256]; // probabilistic EOL lookup table
892 bool maybeStrSpecial[256]; // speeds up string scanning
893 uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs?
894 ExclusiveContext *const cx;
895 JSPrincipals *const originPrincipals;
896 StrictModeGetter *strictModeGetter; // used to test for strict mode
897 };
899 // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error
900 // message have const jschar* type, not const char*.
901 #define JSREPORT_UC 0x100
903 } // namespace frontend
904 } // namespace js
906 extern JS_FRIEND_API(int)
907 js_fgets(char *buf, int size, FILE *file);
909 #ifdef DEBUG
910 extern const char *
911 TokenKindToString(js::frontend::TokenKind tt);
912 #endif
914 #endif /* frontend_TokenStream_h */