|
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- |
|
2 * vim: set ts=8 sts=4 et sw=4 tw=99: |
|
3 * This Source Code Form is subject to the terms of the Mozilla Public |
|
4 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
6 |
|
7 #ifndef frontend_TokenStream_h |
|
8 #define frontend_TokenStream_h |
|
9 |
|
10 // JS lexical scanner interface. |
|
11 |
|
12 #include "mozilla/DebugOnly.h" |
|
13 #include "mozilla/PodOperations.h" |
|
14 |
|
15 #include <stdarg.h> |
|
16 #include <stddef.h> |
|
17 #include <stdio.h> |
|
18 |
|
19 #include "jscntxt.h" |
|
20 #include "jspubtd.h" |
|
21 |
|
22 #include "js/Vector.h" |
|
23 #include "vm/RegExpObject.h" |
|
24 |
|
25 namespace js { |
|
26 namespace frontend { |
|
27 |
|
28 // Values of this type are used to index into arrays such as isExprEnding[], |
|
29 // so the first value must be zero. |
|
30 enum TokenKind { |
|
31 TOK_ERROR = 0, // well-known as the only code < EOF |
|
32 TOK_EOF, // end of file |
|
33 TOK_EOL, // end of line; only returned by peekTokenSameLine() |
|
34 TOK_SEMI, // semicolon |
|
35 TOK_COMMA, // comma operator |
|
36 TOK_HOOK, TOK_COLON, // conditional (?:) |
|
37 TOK_INC, TOK_DEC, // increment/decrement (++ --) |
|
38 TOK_DOT, // member operator (.) |
|
39 TOK_TRIPLEDOT, // for rest arguments (...) |
|
40 TOK_LB, TOK_RB, // left and right brackets |
|
41 TOK_LC, TOK_RC, // left and right curlies (braces) |
|
42 TOK_LP, TOK_RP, // left and right parentheses |
|
43 TOK_NAME, // identifier |
|
44 TOK_NUMBER, // numeric constant |
|
45 TOK_STRING, // string constant |
|
46 TOK_REGEXP, // RegExp constant |
|
47 TOK_TRUE, // true |
|
48 TOK_FALSE, // false |
|
49 TOK_NULL, // null |
|
50 TOK_THIS, // this |
|
51 TOK_FUNCTION, // function keyword |
|
52 TOK_IF, // if keyword |
|
53 TOK_ELSE, // else keyword |
|
54 TOK_SWITCH, // switch keyword |
|
55 TOK_CASE, // case keyword |
|
56 TOK_DEFAULT, // default keyword |
|
57 TOK_WHILE, // while keyword |
|
58 TOK_DO, // do keyword |
|
59 TOK_FOR, // for keyword |
|
60 TOK_BREAK, // break keyword |
|
61 TOK_CONTINUE, // continue keyword |
|
62 TOK_VAR, // var keyword |
|
63 TOK_CONST, // const keyword |
|
64 TOK_WITH, // with keyword |
|
65 TOK_RETURN, // return keyword |
|
66 TOK_NEW, // new keyword |
|
67 TOK_DELETE, // delete keyword |
|
68 TOK_TRY, // try keyword |
|
69 TOK_CATCH, // catch keyword |
|
70 TOK_FINALLY, // finally keyword |
|
71 TOK_THROW, // throw keyword |
|
72 TOK_DEBUGGER, // debugger keyword |
|
73 TOK_YIELD, // yield from generator function |
|
74 TOK_LET, // let keyword |
|
75 TOK_EXPORT, // export keyword |
|
76 TOK_IMPORT, // import keyword |
|
77 TOK_RESERVED, // reserved keywords |
|
78 TOK_STRICT_RESERVED, // reserved keywords in strict mode |
|
79 |
|
80 // The following token types occupy contiguous ranges to enable easy |
|
81 // range-testing. |
|
82 |
|
83 // Binary operators tokens, TOK_OR thru TOK_MOD. These must be in the same |
|
84 // order as F(OR) and friends in FOR_EACH_PARSE_NODE_KIND in ParseNode.h. |
|
85 TOK_OR, // logical or (||) |
|
86 TOK_BINOP_FIRST = TOK_OR, |
|
87 TOK_AND, // logical and (&&) |
|
88 TOK_BITOR, // bitwise-or (|) |
|
89 TOK_BITXOR, // bitwise-xor (^) |
|
90 TOK_BITAND, // bitwise-and (&) |
|
91 |
|
92 // Equality operation tokens, per TokenKindIsEquality. |
|
93 TOK_STRICTEQ, |
|
94 TOK_EQUALITY_START = TOK_STRICTEQ, |
|
95 TOK_EQ, |
|
96 TOK_STRICTNE, |
|
97 TOK_NE, |
|
98 TOK_EQUALITY_LAST = TOK_NE, |
|
99 |
|
100 // Relational ops (< <= > >=), per TokenKindIsRelational. |
|
101 TOK_LT, |
|
102 TOK_RELOP_START = TOK_LT, |
|
103 TOK_LE, |
|
104 TOK_GT, |
|
105 TOK_GE, |
|
106 TOK_RELOP_LAST = TOK_GE, |
|
107 |
|
108 TOK_INSTANCEOF, // |instanceof| keyword |
|
109 TOK_IN, // |in| keyword |
|
110 |
|
111 // Shift ops (<< >> >>>), per TokenKindIsShift. |
|
112 TOK_LSH, |
|
113 TOK_SHIFTOP_START = TOK_LSH, |
|
114 TOK_RSH, |
|
115 TOK_URSH, |
|
116 TOK_SHIFTOP_LAST = TOK_URSH, |
|
117 |
|
118 TOK_ADD, |
|
119 TOK_SUB, |
|
120 TOK_MUL, |
|
121 TOK_DIV, |
|
122 TOK_MOD, |
|
123 TOK_BINOP_LAST = TOK_MOD, |
|
124 |
|
125 // Unary operation tokens. |
|
126 TOK_TYPEOF, |
|
127 TOK_VOID, |
|
128 TOK_NOT, |
|
129 TOK_BITNOT, |
|
130 |
|
131 TOK_ARROW, // function arrow (=>) |
|
132 |
|
133 // Assignment ops (= += -= etc.), per TokenKindIsAssignment |
|
134 TOK_ASSIGN, |
|
135 TOK_ASSIGNMENT_START = TOK_ASSIGN, |
|
136 TOK_ADDASSIGN, |
|
137 TOK_SUBASSIGN, |
|
138 TOK_BITORASSIGN, |
|
139 TOK_BITXORASSIGN, |
|
140 TOK_BITANDASSIGN, |
|
141 TOK_LSHASSIGN, |
|
142 TOK_RSHASSIGN, |
|
143 TOK_URSHASSIGN, |
|
144 TOK_MULASSIGN, |
|
145 TOK_DIVASSIGN, |
|
146 TOK_MODASSIGN, |
|
147 TOK_ASSIGNMENT_LAST = TOK_MODASSIGN, |
|
148 |
|
149 TOK_LIMIT // domain size |
|
150 }; |
|
151 |
|
152 inline bool |
|
153 TokenKindIsBinaryOp(TokenKind tt) |
|
154 { |
|
155 return TOK_BINOP_FIRST <= tt && tt <= TOK_BINOP_LAST; |
|
156 } |
|
157 |
|
158 inline bool |
|
159 TokenKindIsEquality(TokenKind tt) |
|
160 { |
|
161 return TOK_EQUALITY_START <= tt && tt <= TOK_EQUALITY_LAST; |
|
162 } |
|
163 |
|
164 inline bool |
|
165 TokenKindIsRelational(TokenKind tt) |
|
166 { |
|
167 return TOK_RELOP_START <= tt && tt <= TOK_RELOP_LAST; |
|
168 } |
|
169 |
|
170 inline bool |
|
171 TokenKindIsShift(TokenKind tt) |
|
172 { |
|
173 return TOK_SHIFTOP_START <= tt && tt <= TOK_SHIFTOP_LAST; |
|
174 } |
|
175 |
|
176 inline bool |
|
177 TokenKindIsAssignment(TokenKind tt) |
|
178 { |
|
179 return TOK_ASSIGNMENT_START <= tt && tt <= TOK_ASSIGNMENT_LAST; |
|
180 } |
|
181 |
|
182 inline bool |
|
183 TokenKindIsDecl(TokenKind tt) |
|
184 { |
|
185 return tt == TOK_VAR || tt == TOK_LET; |
|
186 } |
|
187 |
|
188 struct TokenPos { |
|
189 uint32_t begin; // Offset of the token's first char. |
|
190 uint32_t end; // Offset of 1 past the token's last char. |
|
191 |
|
192 TokenPos() {} |
|
193 TokenPos(uint32_t begin, uint32_t end) : begin(begin), end(end) {} |
|
194 |
|
195 // Return a TokenPos that covers left, right, and anything in between. |
|
196 static TokenPos box(const TokenPos &left, const TokenPos &right) { |
|
197 JS_ASSERT(left.begin <= left.end); |
|
198 JS_ASSERT(left.end <= right.begin); |
|
199 JS_ASSERT(right.begin <= right.end); |
|
200 return TokenPos(left.begin, right.end); |
|
201 } |
|
202 |
|
203 bool operator==(const TokenPos& bpos) const { |
|
204 return begin == bpos.begin && end == bpos.end; |
|
205 } |
|
206 |
|
207 bool operator!=(const TokenPos& bpos) const { |
|
208 return begin != bpos.begin || end != bpos.end; |
|
209 } |
|
210 |
|
211 bool operator <(const TokenPos& bpos) const { |
|
212 return begin < bpos.begin; |
|
213 } |
|
214 |
|
215 bool operator <=(const TokenPos& bpos) const { |
|
216 return begin <= bpos.begin; |
|
217 } |
|
218 |
|
219 bool operator >(const TokenPos& bpos) const { |
|
220 return !(*this <= bpos); |
|
221 } |
|
222 |
|
223 bool operator >=(const TokenPos& bpos) const { |
|
224 return !(*this < bpos); |
|
225 } |
|
226 |
|
227 bool encloses(const TokenPos& pos) const { |
|
228 return begin <= pos.begin && pos.end <= end; |
|
229 } |
|
230 }; |
|
231 |
|
232 enum DecimalPoint { NoDecimal = false, HasDecimal = true }; |
|
233 |
|
234 struct Token |
|
235 { |
|
236 TokenKind type; // char value or above enumerator |
|
237 TokenPos pos; // token position in file |
|
238 union { |
|
239 private: |
|
240 friend struct Token; |
|
241 PropertyName *name; // non-numeric atom |
|
242 JSAtom *atom; // potentially-numeric atom |
|
243 struct { |
|
244 double value; // floating point number |
|
245 DecimalPoint decimalPoint; // literal contains '.' |
|
246 } number; |
|
247 RegExpFlag reflags; // regexp flags; use tokenbuf to access |
|
248 // regexp chars |
|
249 } u; |
|
250 |
|
251 // This constructor is necessary only for MSVC 2013 and how it compiles the |
|
252 // initialization of TokenStream::tokens. That field is initialized as |
|
253 // tokens() in the constructor init-list. This *should* zero the entire |
|
254 // array, then (because Token has a non-trivial constructor, because |
|
255 // TokenPos has a user-provided constructor) call the implicit Token |
|
256 // constructor on each element, which would call the TokenPos constructor |
|
257 // for Token::pos and do nothing. (All of which is equivalent to just |
|
258 // zeroing TokenStream::tokens.) But MSVC 2013 (2010/2012 don't have this |
|
259 // bug) doesn't zero out each element, so we need this extra constructor to |
|
260 // make it do the right thing. (Token is used primarily by reference or |
|
261 // pointer, and it's only initialized a very few places, so having a |
|
262 // user-defined constructor won't hurt perf.) See also bug 920318. |
|
263 Token() |
|
264 : type(TOK_ERROR), |
|
265 pos(0, 0) |
|
266 { |
|
267 } |
|
268 |
|
269 // Mutators |
|
270 |
|
271 void setName(PropertyName *name) { |
|
272 JS_ASSERT(type == TOK_NAME); |
|
273 JS_ASSERT(!IsPoisonedPtr(name)); |
|
274 u.name = name; |
|
275 } |
|
276 |
|
277 void setAtom(JSAtom *atom) { |
|
278 JS_ASSERT(type == TOK_STRING); |
|
279 JS_ASSERT(!IsPoisonedPtr(atom)); |
|
280 u.atom = atom; |
|
281 } |
|
282 |
|
283 void setRegExpFlags(js::RegExpFlag flags) { |
|
284 JS_ASSERT(type == TOK_REGEXP); |
|
285 JS_ASSERT((flags & AllFlags) == flags); |
|
286 u.reflags = flags; |
|
287 } |
|
288 |
|
289 void setNumber(double n, DecimalPoint decimalPoint) { |
|
290 JS_ASSERT(type == TOK_NUMBER); |
|
291 u.number.value = n; |
|
292 u.number.decimalPoint = decimalPoint; |
|
293 } |
|
294 |
|
295 // Type-safe accessors |
|
296 |
|
297 PropertyName *name() const { |
|
298 JS_ASSERT(type == TOK_NAME); |
|
299 return u.name->asPropertyName(); // poor-man's type verification |
|
300 } |
|
301 |
|
302 JSAtom *atom() const { |
|
303 JS_ASSERT(type == TOK_STRING); |
|
304 return u.atom; |
|
305 } |
|
306 |
|
307 js::RegExpFlag regExpFlags() const { |
|
308 JS_ASSERT(type == TOK_REGEXP); |
|
309 JS_ASSERT((u.reflags & AllFlags) == u.reflags); |
|
310 return u.reflags; |
|
311 } |
|
312 |
|
313 double number() const { |
|
314 JS_ASSERT(type == TOK_NUMBER); |
|
315 return u.number.value; |
|
316 } |
|
317 |
|
318 DecimalPoint decimalPoint() const { |
|
319 JS_ASSERT(type == TOK_NUMBER); |
|
320 return u.number.decimalPoint; |
|
321 } |
|
322 }; |
|
323 |
|
324 struct CompileError { |
|
325 JSErrorReport report; |
|
326 char *message; |
|
327 ErrorArgumentsType argumentsType; |
|
328 CompileError() |
|
329 : message(nullptr), argumentsType(ArgumentsAreUnicode) |
|
330 { |
|
331 mozilla::PodZero(&report); |
|
332 } |
|
333 ~CompileError(); |
|
334 void throwError(JSContext *cx); |
|
335 |
|
336 private: |
|
337 // CompileError owns raw allocated memory, so disable assignment and copying |
|
338 // for safety. |
|
339 void operator=(const CompileError &) MOZ_DELETE; |
|
340 CompileError(const CompileError &) MOZ_DELETE; |
|
341 }; |
|
342 |
|
343 // Ideally, tokenizing would be entirely independent of context. But the |
|
344 // strict mode flag, which is in SharedContext, affects tokenizing, and |
|
345 // TokenStream needs to see it. |
|
346 // |
|
347 // This class is a tiny back-channel from TokenStream to the strict mode flag |
|
348 // that avoids exposing the rest of SharedContext to TokenStream. |
|
349 // |
|
350 class StrictModeGetter { |
|
351 public: |
|
352 virtual bool strictMode() = 0; |
|
353 }; |
|
354 |
|
355 // TokenStream is the lexical scanner for Javascript source text. |
|
356 // |
|
357 // It takes a buffer of jschars and linearly scans it into |Token|s. |
|
358 // Internally the class uses a four element circular buffer |tokens| of |
|
359 // |Token|s. As an index for |tokens|, the member |cursor| points to the |
|
360 // current token. |
|
361 // Calls to getToken() increase |cursor| by one and return the new current |
|
362 // token. If a TokenStream was just created, the current token is initialized |
|
363 // with random data (i.e. not initialized). It is therefore important that |
|
364 // one of the first four member functions listed below is called first. |
|
365 // The circular buffer lets us go back up to two tokens from the last |
|
366 // scanned token. Internally, the relative number of backward steps that were |
|
367 // taken (via ungetToken()) after the last token was scanned is stored in |
|
368 // |lookahead|. |
|
369 // |
|
370 // The following table lists in which situations it is safe to call each listed |
|
371 // function. No checks are made by the functions in non-debug builds. |
|
372 // |
|
373 // Function Name | Precondition; changes to |lookahead| |
|
374 // ------------------+--------------------------------------------------------- |
|
375 // getToken | none; if |lookahead > 0| then |lookahead--| |
|
376 // peekToken | none; if |lookahead == 0| then |lookahead == 1| |
|
377 // peekTokenSameLine | none; if |lookahead == 0| then |lookahead == 1| |
|
378 // matchToken | none; if |lookahead > 0| and the match succeeds then |
|
379 // | |lookahead--| |
|
380 // consumeKnownToken | none; if |lookahead > 0| then |lookahead--| |
|
381 // ungetToken | 0 <= |lookahead| <= |maxLookahead - 1|; |lookahead++| |
|
382 // |
|
383 // The behavior of the token scanning process (see getTokenInternal()) can be |
|
384 // modified by calling one of the first four above listed member functions with |
|
385 // an optional argument of type Modifier. However, the modifier will be |
|
386 // ignored unless |lookahead == 0| holds. Due to constraints of the grammar, |
|
387 // this turns out not to be a problem in practice. See the |
|
388 // mozilla.dev.tech.js-engine.internals thread entitled 'Bug in the scanner?' |
|
389 // for more details: |
|
390 // https://groups.google.com/forum/?fromgroups=#!topic/mozilla.dev.tech.js-engine.internals/2JLH5jRcr7E). |
|
391 // |
|
392 // The methods seek() and tell() allow to rescan from a previous visited |
|
393 // location of the buffer. |
|
394 // |
|
395 class MOZ_STACK_CLASS TokenStream |
|
396 { |
|
397 // Unicode separators that are treated as line terminators, in addition to \n, \r. |
|
398 enum { |
|
399 LINE_SEPARATOR = 0x2028, |
|
400 PARA_SEPARATOR = 0x2029 |
|
401 }; |
|
402 |
|
403 static const size_t ntokens = 4; // 1 current + 2 lookahead, rounded |
|
404 // to power of 2 to avoid divmod by 3 |
|
405 static const unsigned maxLookahead = 2; |
|
406 static const unsigned ntokensMask = ntokens - 1; |
|
407 |
|
408 public: |
|
409 typedef Vector<jschar, 32> CharBuffer; |
|
410 |
|
411 TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options, |
|
412 const jschar *base, size_t length, StrictModeGetter *smg); |
|
413 |
|
414 ~TokenStream(); |
|
415 |
|
416 // Accessors. |
|
417 const Token ¤tToken() const { return tokens[cursor]; } |
|
418 bool isCurrentTokenType(TokenKind type) const { |
|
419 return currentToken().type == type; |
|
420 } |
|
421 const CharBuffer &getTokenbuf() const { return tokenbuf; } |
|
422 const char *getFilename() const { return filename; } |
|
423 unsigned getLineno() const { return lineno; } |
|
424 unsigned getColumn() const { return userbuf.addressOfNextRawChar() - linebase - 1; } |
|
425 JSPrincipals *getOriginPrincipals() const { return originPrincipals; } |
|
426 JSVersion versionNumber() const { return VersionNumber(options().version); } |
|
427 JSVersion versionWithFlags() const { return options().version; } |
|
428 |
|
429 PropertyName *currentName() const { |
|
430 if (isCurrentTokenType(TOK_YIELD)) |
|
431 return cx->names().yield; |
|
432 JS_ASSERT(isCurrentTokenType(TOK_NAME)); |
|
433 return currentToken().name(); |
|
434 } |
|
435 |
|
436 bool isCurrentTokenAssignment() const { |
|
437 return TokenKindIsAssignment(currentToken().type); |
|
438 } |
|
439 |
|
440 // Flag methods. |
|
441 bool isEOF() const { return flags.isEOF; } |
|
442 bool sawOctalEscape() const { return flags.sawOctalEscape; } |
|
443 bool hadError() const { return flags.hadError; } |
|
444 |
|
445 // TokenStream-specific error reporters. |
|
446 bool reportError(unsigned errorNumber, ...); |
|
447 bool reportWarning(unsigned errorNumber, ...); |
|
448 |
|
449 static const uint32_t NoOffset = UINT32_MAX; |
|
450 |
|
451 // General-purpose error reporters. You should avoid calling these |
|
452 // directly, and instead use the more succinct alternatives (e.g. |
|
453 // reportError()) in TokenStream, Parser, and BytecodeEmitter. |
|
454 bool reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, |
|
455 va_list args); |
|
456 bool reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, |
|
457 va_list args); |
|
458 bool reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, |
|
459 va_list args); |
|
460 |
|
461 // asm.js reporter |
|
462 void reportAsmJSError(uint32_t offset, unsigned errorNumber, ...); |
|
463 |
|
464 private: |
|
465 // These are private because they should only be called by the tokenizer |
|
466 // while tokenizing not by, for example, BytecodeEmitter. |
|
467 bool reportStrictModeError(unsigned errorNumber, ...); |
|
468 bool strictMode() const { return strictModeGetter && strictModeGetter->strictMode(); } |
|
469 |
|
470 void onError(); |
|
471 static JSAtom *atomize(ExclusiveContext *cx, CharBuffer &cb); |
|
472 bool putIdentInTokenbuf(const jschar *identStart); |
|
473 |
|
474 struct Flags |
|
475 { |
|
476 bool isEOF:1; // Hit end of file. |
|
477 bool isDirtyLine:1; // Non-whitespace since start of line. |
|
478 bool sawOctalEscape:1; // Saw an octal character escape. |
|
479 bool hadError:1; // Returned TOK_ERROR from getToken. |
|
480 |
|
481 Flags() |
|
482 : isEOF(), isDirtyLine(), sawOctalEscape(), hadError() |
|
483 {} |
|
484 }; |
|
485 |
|
486 public: |
|
487 // Sometimes the parser needs to modify how tokens are created. |
|
488 enum Modifier |
|
489 { |
|
490 None, // Normal operation. |
|
491 Operand, // Looking for an operand, not an operator. In |
|
492 // practice, this means that when '/' is seen, |
|
493 // we look for a regexp instead of just returning |
|
494 // TOK_DIV. |
|
495 KeywordIsName, // Treat keywords as names by returning TOK_NAME. |
|
496 }; |
|
497 |
|
498 // Get the next token from the stream, make it the current token, and |
|
499 // return its kind. |
|
500 TokenKind getToken(Modifier modifier = None) { |
|
501 // Check for a pushed-back token resulting from mismatching lookahead. |
|
502 if (lookahead != 0) { |
|
503 lookahead--; |
|
504 cursor = (cursor + 1) & ntokensMask; |
|
505 TokenKind tt = currentToken().type; |
|
506 JS_ASSERT(tt != TOK_EOL); |
|
507 return tt; |
|
508 } |
|
509 |
|
510 return getTokenInternal(modifier); |
|
511 } |
|
512 |
|
513 // Push the last scanned token back into the stream. |
|
514 void ungetToken() { |
|
515 JS_ASSERT(lookahead < maxLookahead); |
|
516 lookahead++; |
|
517 cursor = (cursor - 1) & ntokensMask; |
|
518 } |
|
519 |
|
520 TokenKind peekToken(Modifier modifier = None) { |
|
521 if (lookahead != 0) |
|
522 return tokens[(cursor + 1) & ntokensMask].type; |
|
523 TokenKind tt = getTokenInternal(modifier); |
|
524 ungetToken(); |
|
525 return tt; |
|
526 } |
|
527 |
|
528 TokenPos peekTokenPos(Modifier modifier = None) { |
|
529 if (lookahead != 0) |
|
530 return tokens[(cursor + 1) & ntokensMask].pos; |
|
531 getTokenInternal(modifier); |
|
532 ungetToken(); |
|
533 JS_ASSERT(lookahead != 0); |
|
534 return tokens[(cursor + 1) & ntokensMask].pos; |
|
535 } |
|
536 |
|
537 // This is like peekToken(), with one exception: if there is an EOL |
|
538 // between the end of the current token and the start of the next token, it |
|
539 // returns TOK_EOL. In that case, no token with TOK_EOL is actually |
|
540 // created, just a TOK_EOL TokenKind is returned, and currentToken() |
|
541 // shouldn't be consulted. (This is the only place TOK_EOL is produced.) |
|
542 MOZ_ALWAYS_INLINE TokenKind peekTokenSameLine(Modifier modifier = None) { |
|
543 const Token &curr = currentToken(); |
|
544 |
|
545 // If lookahead != 0, we have scanned ahead at least one token, and |
|
546 // |lineno| is the line that the furthest-scanned token ends on. If |
|
547 // it's the same as the line that the current token ends on, that's a |
|
548 // stronger condition than what we are looking for, and we don't need |
|
549 // to return TOK_EOL. |
|
550 if (lookahead != 0 && srcCoords.isOnThisLine(curr.pos.end, lineno)) |
|
551 return tokens[(cursor + 1) & ntokensMask].type; |
|
552 |
|
553 // The above check misses two cases where we don't have to return |
|
554 // TOK_EOL. |
|
555 // - The next token starts on the same line, but is a multi-line token. |
|
556 // - The next token starts on the same line, but lookahead==2 and there |
|
557 // is a newline between the next token and the one after that. |
|
558 // The following test is somewhat expensive but gets these cases (and |
|
559 // all others) right. |
|
560 (void)getToken(modifier); |
|
561 const Token &next = currentToken(); |
|
562 ungetToken(); |
|
563 return srcCoords.lineNum(curr.pos.end) == srcCoords.lineNum(next.pos.begin) |
|
564 ? next.type |
|
565 : TOK_EOL; |
|
566 } |
|
567 |
|
568 // Get the next token from the stream if its kind is |tt|. |
|
569 bool matchToken(TokenKind tt, Modifier modifier = None) { |
|
570 if (getToken(modifier) == tt) |
|
571 return true; |
|
572 ungetToken(); |
|
573 return false; |
|
574 } |
|
575 |
|
576 void consumeKnownToken(TokenKind tt) { |
|
577 JS_ALWAYS_TRUE(matchToken(tt)); |
|
578 } |
|
579 |
|
580 bool matchContextualKeyword(Handle<PropertyName*> keyword) { |
|
581 if (getToken() == TOK_NAME && currentToken().name() == keyword) |
|
582 return true; |
|
583 ungetToken(); |
|
584 return false; |
|
585 } |
|
586 |
|
587 bool nextTokenEndsExpr() { |
|
588 return isExprEnding[peekToken()]; |
|
589 } |
|
590 |
|
591 class MOZ_STACK_CLASS Position { |
|
592 public: |
|
593 // The Token fields may contain pointers to atoms, so for correct |
|
594 // rooting we must ensure collection of atoms is disabled while objects |
|
595 // of this class are live. Do this by requiring a dummy AutoKeepAtoms |
|
596 // reference in the constructor. |
|
597 // |
|
598 // This class is explicity ignored by the analysis, so don't add any |
|
599 // more pointers to GC things here! |
|
600 Position(AutoKeepAtoms&) { } |
|
601 private: |
|
602 Position(const Position&) MOZ_DELETE; |
|
603 friend class TokenStream; |
|
604 const jschar *buf; |
|
605 Flags flags; |
|
606 unsigned lineno; |
|
607 const jschar *linebase; |
|
608 const jschar *prevLinebase; |
|
609 Token currentToken; |
|
610 unsigned lookahead; |
|
611 Token lookaheadTokens[maxLookahead]; |
|
612 }; |
|
613 |
|
614 void advance(size_t position); |
|
615 void tell(Position *); |
|
616 void seek(const Position &pos); |
|
617 bool seek(const Position &pos, const TokenStream &other); |
|
618 |
|
619 size_t positionToOffset(const Position &pos) const { |
|
620 return pos.buf - userbuf.base(); |
|
621 } |
|
622 |
|
623 const jschar *rawBase() const { |
|
624 return userbuf.base(); |
|
625 } |
|
626 |
|
627 const jschar *rawLimit() const { |
|
628 return userbuf.limit(); |
|
629 } |
|
630 |
|
631 bool hasDisplayURL() const { |
|
632 return displayURL_ != nullptr; |
|
633 } |
|
634 |
|
635 jschar *displayURL() { |
|
636 return displayURL_; |
|
637 } |
|
638 |
|
639 bool hasSourceMapURL() const { |
|
640 return sourceMapURL_ != nullptr; |
|
641 } |
|
642 |
|
643 jschar *sourceMapURL() { |
|
644 return sourceMapURL_; |
|
645 } |
|
646 |
|
647 // If the name at s[0:length] is not a keyword in this version, return |
|
648 // true with *ttp unchanged. |
|
649 // |
|
650 // If it is a reserved word in this version and strictness mode, and thus |
|
651 // can't be present in correct code, report a SyntaxError and return false. |
|
652 // |
|
653 // If it is a keyword, like "if", the behavior depends on ttp. If ttp is |
|
654 // null, report a SyntaxError ("if is a reserved identifier") and return |
|
655 // false. If ttp is non-null, return true with the keyword's TokenKind in |
|
656 // *ttp. |
|
657 bool checkForKeyword(const jschar *s, size_t length, TokenKind *ttp); |
|
658 |
|
659 // This class maps a userbuf offset (which is 0-indexed) to a line number |
|
660 // (which is 1-indexed) and a column index (which is 0-indexed). |
|
661 class SourceCoords |
|
662 { |
|
663 // For a given buffer holding source code, |lineStartOffsets_| has one |
|
664 // element per line of source code, plus one sentinel element. Each |
|
665 // non-sentinel element holds the buffer offset for the start of the |
|
666 // corresponding line of source code. For this example script: |
|
667 // |
|
668 // 1 // xyz [line starts at offset 0] |
|
669 // 2 var x; [line starts at offset 7] |
|
670 // 3 [line starts at offset 14] |
|
671 // 4 var y; [line starts at offset 15] |
|
672 // |
|
673 // |lineStartOffsets_| is: |
|
674 // |
|
675 // [0, 7, 14, 15, MAX_PTR] |
|
676 // |
|
677 // To convert a "line number" to a "line index" (i.e. an index into |
|
678 // |lineStartOffsets_|), subtract |initialLineNum_|. E.g. line 3's |
|
679 // line index is (3 - initialLineNum_), which is 2. Therefore |
|
680 // lineStartOffsets_[2] holds the buffer offset for the start of line 3, |
|
681 // which is 14. (Note that |initialLineNum_| is often 1, but not |
|
682 // always.) |
|
683 // |
|
684 // The first element is always 0, and the last element is always the |
|
685 // MAX_PTR sentinel. |
|
686 // |
|
687 // offset-to-line/column lookups are O(log n) in the worst case (binary |
|
688 // search), but in practice they're heavily clustered and we do better |
|
689 // than that by using the previous lookup's result (lastLineIndex_) as |
|
690 // a starting point. |
|
691 // |
|
692 // Checking if an offset lies within a particular line number |
|
693 // (isOnThisLine()) is O(1). |
|
694 // |
|
695 Vector<uint32_t, 128> lineStartOffsets_; |
|
696 uint32_t initialLineNum_; |
|
697 |
|
698 // This is mutable because it's modified on every search, but that fact |
|
699 // isn't visible outside this class. |
|
700 mutable uint32_t lastLineIndex_; |
|
701 |
|
702 uint32_t lineIndexOf(uint32_t offset) const; |
|
703 |
|
704 static const uint32_t MAX_PTR = UINT32_MAX; |
|
705 |
|
706 uint32_t lineIndexToNum(uint32_t lineIndex) const { return lineIndex + initialLineNum_; } |
|
707 uint32_t lineNumToIndex(uint32_t lineNum) const { return lineNum - initialLineNum_; } |
|
708 |
|
709 public: |
|
710 SourceCoords(ExclusiveContext *cx, uint32_t ln); |
|
711 |
|
712 void add(uint32_t lineNum, uint32_t lineStartOffset); |
|
713 bool fill(const SourceCoords &other); |
|
714 |
|
715 bool isOnThisLine(uint32_t offset, uint32_t lineNum) const { |
|
716 uint32_t lineIndex = lineNumToIndex(lineNum); |
|
717 JS_ASSERT(lineIndex + 1 < lineStartOffsets_.length()); // +1 due to sentinel |
|
718 return lineStartOffsets_[lineIndex] <= offset && |
|
719 offset < lineStartOffsets_[lineIndex + 1]; |
|
720 } |
|
721 |
|
722 uint32_t lineNum(uint32_t offset) const; |
|
723 uint32_t columnIndex(uint32_t offset) const; |
|
724 void lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, uint32_t *columnIndex) const; |
|
725 }; |
|
726 |
|
727 SourceCoords srcCoords; |
|
728 |
|
729 JSAtomState &names() const { |
|
730 return cx->names(); |
|
731 } |
|
732 |
|
733 ExclusiveContext *context() const { |
|
734 return cx; |
|
735 } |
|
736 |
|
737 const ReadOnlyCompileOptions &options() const { |
|
738 return options_; |
|
739 } |
|
740 |
|
741 private: |
|
742 // This is the low-level interface to the JS source code buffer. It just |
|
743 // gets raw chars, basically. TokenStreams functions are layered on top |
|
744 // and do some extra stuff like converting all EOL sequences to '\n', |
|
745 // tracking the line number, and setting |flags.isEOF|. (The "raw" in "raw |
|
746 // chars" refers to the lack of EOL sequence normalization.) |
|
747 class TokenBuf { |
|
748 public: |
|
749 TokenBuf(ExclusiveContext *cx, const jschar *buf, size_t length) |
|
750 : base_(buf), limit_(buf + length), ptr(buf) |
|
751 { } |
|
752 |
|
753 bool hasRawChars() const { |
|
754 return ptr < limit_; |
|
755 } |
|
756 |
|
757 bool atStart() const { |
|
758 return ptr == base_; |
|
759 } |
|
760 |
|
761 const jschar *base() const { |
|
762 return base_; |
|
763 } |
|
764 |
|
765 const jschar *limit() const { |
|
766 return limit_; |
|
767 } |
|
768 |
|
769 jschar getRawChar() { |
|
770 return *ptr++; // this will nullptr-crash if poisoned |
|
771 } |
|
772 |
|
773 jschar peekRawChar() const { |
|
774 return *ptr; // this will nullptr-crash if poisoned |
|
775 } |
|
776 |
|
777 bool matchRawChar(jschar c) { |
|
778 if (*ptr == c) { // this will nullptr-crash if poisoned |
|
779 ptr++; |
|
780 return true; |
|
781 } |
|
782 return false; |
|
783 } |
|
784 |
|
785 bool matchRawCharBackwards(jschar c) { |
|
786 JS_ASSERT(ptr); // make sure it hasn't been poisoned |
|
787 if (*(ptr - 1) == c) { |
|
788 ptr--; |
|
789 return true; |
|
790 } |
|
791 return false; |
|
792 } |
|
793 |
|
794 void ungetRawChar() { |
|
795 JS_ASSERT(ptr); // make sure it hasn't been poisoned |
|
796 ptr--; |
|
797 } |
|
798 |
|
799 const jschar *addressOfNextRawChar(bool allowPoisoned = false) const { |
|
800 JS_ASSERT_IF(!allowPoisoned, ptr); // make sure it hasn't been poisoned |
|
801 return ptr; |
|
802 } |
|
803 |
|
804 // Use this with caution! |
|
805 void setAddressOfNextRawChar(const jschar *a, bool allowPoisoned = false) { |
|
806 JS_ASSERT_IF(!allowPoisoned, a); |
|
807 ptr = a; |
|
808 } |
|
809 |
|
810 #ifdef DEBUG |
|
811 // Poison the TokenBuf so it cannot be accessed again. |
|
812 void poison() { |
|
813 ptr = nullptr; |
|
814 } |
|
815 #endif |
|
816 |
|
817 static bool isRawEOLChar(int32_t c) { |
|
818 return c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR; |
|
819 } |
|
820 |
|
821 // Finds the next EOL, but stops once 'max' jschars have been scanned |
|
822 // (*including* the starting jschar). |
|
823 const jschar *findEOLMax(const jschar *p, size_t max); |
|
824 |
|
825 private: |
|
826 const jschar *base_; // base of buffer |
|
827 const jschar *limit_; // limit for quick bounds check |
|
828 const jschar *ptr; // next char to get |
|
829 }; |
|
830 |
|
831 TokenKind getTokenInternal(Modifier modifier); |
|
832 |
|
833 int32_t getChar(); |
|
834 int32_t getCharIgnoreEOL(); |
|
835 void ungetChar(int32_t c); |
|
836 void ungetCharIgnoreEOL(int32_t c); |
|
837 Token *newToken(ptrdiff_t adjust); |
|
838 bool peekUnicodeEscape(int32_t *c); |
|
839 bool matchUnicodeEscapeIdStart(int32_t *c); |
|
840 bool matchUnicodeEscapeIdent(int32_t *c); |
|
841 bool peekChars(int n, jschar *cp); |
|
842 |
|
843 bool getDirectives(bool isMultiline, bool shouldWarnDeprecated); |
|
844 bool getDirective(bool isMultiline, bool shouldWarnDeprecated, |
|
845 const char *directive, int directiveLength, |
|
846 const char *errorMsgPragma, jschar **destination); |
|
847 bool getDisplayURL(bool isMultiline, bool shouldWarnDeprecated); |
|
848 bool getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated); |
|
849 |
|
850 // |expect| cannot be an EOL char. |
|
851 bool matchChar(int32_t expect) { |
|
852 MOZ_ASSERT(!TokenBuf::isRawEOLChar(expect)); |
|
853 return MOZ_LIKELY(userbuf.hasRawChars()) && |
|
854 userbuf.matchRawChar(expect); |
|
855 } |
|
856 |
|
857 void consumeKnownChar(int32_t expect) { |
|
858 mozilla::DebugOnly<int32_t> c = getChar(); |
|
859 JS_ASSERT(c == expect); |
|
860 } |
|
861 |
|
862 int32_t peekChar() { |
|
863 int32_t c = getChar(); |
|
864 ungetChar(c); |
|
865 return c; |
|
866 } |
|
867 |
|
868 void skipChars(int n) { |
|
869 while (--n >= 0) |
|
870 getChar(); |
|
871 } |
|
872 |
|
873 void updateLineInfoForEOL(); |
|
874 void updateFlagsForEOL(); |
|
875 |
|
876 // Options used for parsing/tokenizing. |
|
877 const ReadOnlyCompileOptions &options_; |
|
878 |
|
879 Token tokens[ntokens]; // circular token buffer |
|
880 unsigned cursor; // index of last parsed token |
|
881 unsigned lookahead; // count of lookahead tokens |
|
882 unsigned lineno; // current line number |
|
883 Flags flags; // flags -- see above |
|
884 const jschar *linebase; // start of current line; points into userbuf |
|
885 const jschar *prevLinebase; // start of previous line; nullptr if on the first line |
|
886 TokenBuf userbuf; // user input buffer |
|
887 const char *filename; // input filename or null |
|
888 jschar *displayURL_; // the user's requested source URL or null |
|
889 jschar *sourceMapURL_; // source map's filename or null |
|
890 CharBuffer tokenbuf; // current token string buffer |
|
891 bool maybeEOL[256]; // probabilistic EOL lookup table |
|
892 bool maybeStrSpecial[256]; // speeds up string scanning |
|
893 uint8_t isExprEnding[TOK_LIMIT];// which tokens definitely terminate exprs? |
|
894 ExclusiveContext *const cx; |
|
895 JSPrincipals *const originPrincipals; |
|
896 StrictModeGetter *strictModeGetter; // used to test for strict mode |
|
897 }; |
|
898 |
|
899 // Steal one JSREPORT_* bit (see jsapi.h) to tell that arguments to the error |
|
900 // message have const jschar* type, not const char*. |
|
901 #define JSREPORT_UC 0x100 |
|
902 |
|
903 } // namespace frontend |
|
904 } // namespace js |
|
905 |
|
906 extern JS_FRIEND_API(int) |
|
907 js_fgets(char *buf, int size, FILE *file); |
|
908 |
|
909 #ifdef DEBUG |
|
910 extern const char * |
|
911 TokenKindToString(js::frontend::TokenKind tt); |
|
912 #endif |
|
913 |
|
914 #endif /* frontend_TokenStream_h */ |