1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/js/src/frontend/TokenStream.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,1821 @@ 1.4 +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 1.5 + * vim: set ts=8 sts=4 et sw=4 tw=99: 1.6 + * This Source Code Form is subject to the terms of the Mozilla Public 1.7 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.8 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.9 + 1.10 +// JS lexical scanner. 1.11 + 1.12 +#include "frontend/TokenStream.h" 1.13 + 1.14 +#include "mozilla/PodOperations.h" 1.15 + 1.16 +#include <ctype.h> 1.17 +#include <stdarg.h> 1.18 +#include <stdio.h> 1.19 +#include <string.h> 1.20 + 1.21 +#include "jsatom.h" 1.22 +#include "jscntxt.h" 1.23 +#include "jsexn.h" 1.24 +#include "jsnum.h" 1.25 +#include "jsworkers.h" 1.26 + 1.27 +#include "frontend/BytecodeCompiler.h" 1.28 +#include "js/CharacterEncoding.h" 1.29 +#include "vm/Keywords.h" 1.30 +#include "vm/StringBuffer.h" 1.31 + 1.32 +using namespace js; 1.33 +using namespace js::frontend; 1.34 +using namespace js::unicode; 1.35 + 1.36 +using mozilla::Maybe; 1.37 +using mozilla::PodAssign; 1.38 +using mozilla::PodCopy; 1.39 +using mozilla::PodZero; 1.40 + 1.41 +struct KeywordInfo { 1.42 + const char *chars; // C string with keyword text 1.43 + TokenKind tokentype; 1.44 + JSVersion version; 1.45 +}; 1.46 + 1.47 +static const KeywordInfo keywords[] = { 1.48 +#define KEYWORD_INFO(keyword, name, type, version) \ 1.49 + {js_##keyword##_str, type, version}, 1.50 + FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO) 1.51 +#undef KEYWORD_INFO 1.52 +}; 1.53 + 1.54 +// Returns a KeywordInfo for the specified characters, or nullptr if the string 1.55 +// is not a keyword. 1.56 +static const KeywordInfo * 1.57 +FindKeyword(const jschar *s, size_t length) 1.58 +{ 1.59 + JS_ASSERT(length != 0); 1.60 + 1.61 + size_t i; 1.62 + const KeywordInfo *kw; 1.63 + const char *chars; 1.64 + 1.65 +#define JSKW_LENGTH() length 1.66 +#define JSKW_AT(column) s[column] 1.67 +#define JSKW_GOT_MATCH(index) i = (index); goto got_match; 1.68 +#define JSKW_TEST_GUESS(index) i = (index); goto test_guess; 1.69 +#define JSKW_NO_MATCH() goto no_match; 1.70 +#include "jsautokw.h" 1.71 +#undef JSKW_NO_MATCH 1.72 +#undef JSKW_TEST_GUESS 1.73 +#undef JSKW_GOT_MATCH 1.74 +#undef JSKW_AT 1.75 +#undef JSKW_LENGTH 1.76 + 1.77 + got_match: 1.78 + return &keywords[i]; 1.79 + 1.80 + test_guess: 1.81 + kw = &keywords[i]; 1.82 + chars = kw->chars; 1.83 + do { 1.84 + if (*s++ != (unsigned char)(*chars++)) 1.85 + goto no_match; 1.86 + } while (--length != 0); 1.87 + return kw; 1.88 + 1.89 + no_match: 1.90 + return nullptr; 1.91 +} 1.92 + 1.93 +bool 1.94 +frontend::IsIdentifier(JSLinearString *str) 1.95 +{ 1.96 + const jschar *chars = str->chars(); 1.97 + size_t length = str->length(); 1.98 + 1.99 + if (length == 0) 1.100 + return false; 1.101 + jschar c = *chars; 1.102 + if (!IsIdentifierStart(c)) 1.103 + return false; 1.104 + const jschar *end = chars + length; 1.105 + while (++chars != end) { 1.106 + c = *chars; 1.107 + if (!IsIdentifierPart(c)) 1.108 + return false; 1.109 + } 1.110 + return true; 1.111 +} 1.112 + 1.113 +bool 1.114 +frontend::IsKeyword(JSLinearString *str) 1.115 +{ 1.116 + return FindKeyword(str->chars(), str->length()) != nullptr; 1.117 +} 1.118 + 1.119 +TokenStream::SourceCoords::SourceCoords(ExclusiveContext *cx, uint32_t ln) 1.120 + : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0) 1.121 +{ 1.122 + // This is actually necessary! Removing it causes compile errors on 1.123 + // GCC and clang. You could try declaring this: 1.124 + // 1.125 + // const uint32_t TokenStream::SourceCoords::MAX_PTR; 1.126 + // 1.127 + // which fixes the GCC/clang error, but causes bustage on Windows. Sigh. 1.128 + // 1.129 + uint32_t maxPtr = MAX_PTR; 1.130 + 1.131 + // The first line begins at buffer offset 0. MAX_PTR is the sentinel. The 1.132 + // appends cannot fail because |lineStartOffsets_| has statically-allocated 1.133 + // elements. 1.134 + JS_ASSERT(lineStartOffsets_.capacity() >= 2); 1.135 + (void)lineStartOffsets_.reserve(2); 1.136 + lineStartOffsets_.infallibleAppend(0); 1.137 + lineStartOffsets_.infallibleAppend(maxPtr); 1.138 +} 1.139 + 1.140 +MOZ_ALWAYS_INLINE void 1.141 +TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset) 1.142 +{ 1.143 + uint32_t lineIndex = lineNumToIndex(lineNum); 1.144 + uint32_t sentinelIndex = lineStartOffsets_.length() - 1; 1.145 + 1.146 + JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR); 1.147 + 1.148 + if (lineIndex == sentinelIndex) { 1.149 + // We haven't seen this newline before. Update lineStartOffsets_. 1.150 + // We ignore any failures due to OOM -- because we always have a 1.151 + // sentinel node, it'll just be like the newline wasn't present. I.e. 1.152 + // the line numbers will be wrong, but the code won't crash or anything 1.153 + // like that. 1.154 + lineStartOffsets_[lineIndex] = lineStartOffset; 1.155 + 1.156 + uint32_t maxPtr = MAX_PTR; 1.157 + (void)lineStartOffsets_.append(maxPtr); 1.158 + 1.159 + } else { 1.160 + // We have seen this newline before (and ungot it). Do nothing (other 1.161 + // than checking it hasn't mysteriously changed). 1.162 + JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset); 1.163 + } 1.164 +} 1.165 + 1.166 +MOZ_ALWAYS_INLINE bool 1.167 +TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other) 1.168 +{ 1.169 + JS_ASSERT(lineStartOffsets_.back() == MAX_PTR); 1.170 + JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR); 1.171 + 1.172 + if (lineStartOffsets_.length() >= other.lineStartOffsets_.length()) 1.173 + return true; 1.174 + 1.175 + uint32_t sentinelIndex = lineStartOffsets_.length() - 1; 1.176 + lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex]; 1.177 + 1.178 + for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) { 1.179 + if (!lineStartOffsets_.append(other.lineStartOffsets_[i])) 1.180 + return false; 1.181 + } 1.182 + return true; 1.183 +} 1.184 + 1.185 +MOZ_ALWAYS_INLINE uint32_t 1.186 +TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const 1.187 +{ 1.188 + uint32_t iMin, iMax, iMid; 1.189 + 1.190 + if (lineStartOffsets_[lastLineIndex_] <= offset) { 1.191 + // If we reach here, offset is on a line the same as or higher than 1.192 + // last time. Check first for the +0, +1, +2 cases, because they 1.193 + // typically cover 85--98% of cases. 1.194 + if (offset < lineStartOffsets_[lastLineIndex_ + 1]) 1.195 + return lastLineIndex_; // lineIndex is same as last time 1.196 + 1.197 + // If we reach here, there must be at least one more entry (plus the 1.198 + // sentinel). Try it. 1.199 + lastLineIndex_++; 1.200 + if (offset < lineStartOffsets_[lastLineIndex_ + 1]) 1.201 + return lastLineIndex_; // lineIndex is one higher than last time 1.202 + 1.203 + // The same logic applies here. 1.204 + lastLineIndex_++; 1.205 + if (offset < lineStartOffsets_[lastLineIndex_ + 1]) { 1.206 + return lastLineIndex_; // lineIndex is two higher than last time 1.207 + } 1.208 + 1.209 + // No luck. Oh well, we have a better-than-default starting point for 1.210 + // the binary search. 1.211 + iMin = lastLineIndex_ + 1; 1.212 + JS_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel 1.213 + 1.214 + } else { 1.215 + iMin = 0; 1.216 + } 1.217 + 1.218 + // This is a binary search with deferred detection of equality, which was 1.219 + // marginally faster in this case than a standard binary search. 1.220 + // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we 1.221 + // want one before that. 1.222 + iMax = lineStartOffsets_.length() - 2; 1.223 + while (iMax > iMin) { 1.224 + iMid = iMin + (iMax - iMin) / 2; 1.225 + if (offset >= lineStartOffsets_[iMid + 1]) 1.226 + iMin = iMid + 1; // offset is above lineStartOffsets_[iMid] 1.227 + else 1.228 + iMax = iMid; // offset is below or within lineStartOffsets_[iMid] 1.229 + } 1.230 + JS_ASSERT(iMax == iMin); 1.231 + JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]); 1.232 + lastLineIndex_ = iMin; 1.233 + return iMin; 1.234 +} 1.235 + 1.236 +uint32_t 1.237 +TokenStream::SourceCoords::lineNum(uint32_t offset) const 1.238 +{ 1.239 + uint32_t lineIndex = lineIndexOf(offset); 1.240 + return lineIndexToNum(lineIndex); 1.241 +} 1.242 + 1.243 +uint32_t 1.244 +TokenStream::SourceCoords::columnIndex(uint32_t offset) const 1.245 +{ 1.246 + uint32_t lineIndex = lineIndexOf(offset); 1.247 + uint32_t lineStartOffset = lineStartOffsets_[lineIndex]; 1.248 + JS_ASSERT(offset >= lineStartOffset); 1.249 + return offset - lineStartOffset; 1.250 +} 1.251 + 1.252 +void 1.253 +TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum, 1.254 + uint32_t *columnIndex) const 1.255 +{ 1.256 + uint32_t lineIndex = lineIndexOf(offset); 1.257 + *lineNum = lineIndexToNum(lineIndex); 1.258 + uint32_t lineStartOffset = lineStartOffsets_[lineIndex]; 1.259 + JS_ASSERT(offset >= lineStartOffset); 1.260 + *columnIndex = offset - lineStartOffset; 1.261 +} 1.262 + 1.263 +#ifdef _MSC_VER 1.264 +#pragma warning(push) 1.265 +#pragma warning(disable:4351) 1.266 +#endif 1.267 + 1.268 +// Initialize members that aren't initialized in |init|. 1.269 +TokenStream::TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options, 1.270 + const jschar *base, size_t length, StrictModeGetter *smg) 1.271 + : srcCoords(cx, options.lineno), 1.272 + options_(options), 1.273 + tokens(), 1.274 + cursor(), 1.275 + lookahead(), 1.276 + lineno(options.lineno), 1.277 + flags(), 1.278 + linebase(base - options.column), 1.279 + prevLinebase(nullptr), 1.280 + userbuf(cx, base - options.column, length + options.column), // See comment below 1.281 + filename(options.filename()), 1.282 + displayURL_(nullptr), 1.283 + sourceMapURL_(nullptr), 1.284 + tokenbuf(cx), 1.285 + cx(cx), 1.286 + originPrincipals(options.originPrincipals(cx)), 1.287 + strictModeGetter(smg) 1.288 +{ 1.289 + // The caller must ensure that a reference is held on the supplied principals 1.290 + // throughout compilation. 1.291 + JS_ASSERT_IF(originPrincipals, originPrincipals->refcount > 0); 1.292 + 1.293 + // Column numbers are computed as offsets from the current line's base, so the 1.294 + // initial line's base must be included in the buffer. linebase and userbuf 1.295 + // were adjusted above, and if we are starting tokenization part way through 1.296 + // this line then adjust the next character. 1.297 + userbuf.setAddressOfNextRawChar(base); 1.298 + 1.299 + // Nb: the following tables could be static, but initializing them here is 1.300 + // much easier. Don't worry, the time to initialize them for each 1.301 + // TokenStream is trivial. See bug 639420. 1.302 + 1.303 + // See getChar() for an explanation of maybeEOL[]. 1.304 + memset(maybeEOL, 0, sizeof(maybeEOL)); 1.305 + maybeEOL[unsigned('\n')] = true; 1.306 + maybeEOL[unsigned('\r')] = true; 1.307 + maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true; 1.308 + maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true; 1.309 + 1.310 + // See getTokenInternal() for an explanation of maybeStrSpecial[]. 1.311 + memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial)); 1.312 + maybeStrSpecial[unsigned('"')] = true; 1.313 + maybeStrSpecial[unsigned('\'')] = true; 1.314 + maybeStrSpecial[unsigned('\\')] = true; 1.315 + maybeStrSpecial[unsigned('\n')] = true; 1.316 + maybeStrSpecial[unsigned('\r')] = true; 1.317 + maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true; 1.318 + maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true; 1.319 + maybeStrSpecial[unsigned(EOF & 0xff)] = true; 1.320 + 1.321 + // See Parser::assignExpr() for an explanation of isExprEnding[]. 1.322 + memset(isExprEnding, 0, sizeof(isExprEnding)); 1.323 + isExprEnding[TOK_COMMA] = 1; 1.324 + isExprEnding[TOK_SEMI] = 1; 1.325 + isExprEnding[TOK_COLON] = 1; 1.326 + isExprEnding[TOK_RP] = 1; 1.327 + isExprEnding[TOK_RB] = 1; 1.328 + isExprEnding[TOK_RC] = 1; 1.329 +} 1.330 + 1.331 +#ifdef _MSC_VER 1.332 +#pragma warning(pop) 1.333 +#endif 1.334 + 1.335 +TokenStream::~TokenStream() 1.336 +{ 1.337 + js_free(displayURL_); 1.338 + js_free(sourceMapURL_); 1.339 + 1.340 + JS_ASSERT_IF(originPrincipals, originPrincipals->refcount); 1.341 +} 1.342 + 1.343 +// Use the fastest available getc. 1.344 +#if defined(HAVE_GETC_UNLOCKED) 1.345 +# define fast_getc getc_unlocked 1.346 +#elif defined(HAVE__GETC_NOLOCK) 1.347 +# define fast_getc _getc_nolock 1.348 +#else 1.349 +# define fast_getc getc 1.350 +#endif 1.351 + 1.352 +MOZ_ALWAYS_INLINE void 1.353 +TokenStream::updateLineInfoForEOL() 1.354 +{ 1.355 + prevLinebase = linebase; 1.356 + linebase = userbuf.addressOfNextRawChar(); 1.357 + lineno++; 1.358 + srcCoords.add(lineno, linebase - userbuf.base()); 1.359 +} 1.360 + 1.361 +MOZ_ALWAYS_INLINE void 1.362 +TokenStream::updateFlagsForEOL() 1.363 +{ 1.364 + flags.isDirtyLine = false; 1.365 +} 1.366 + 1.367 +// This gets the next char, normalizing all EOL sequences to '\n' as it goes. 1.368 +int32_t 1.369 +TokenStream::getChar() 1.370 +{ 1.371 + int32_t c; 1.372 + if (MOZ_LIKELY(userbuf.hasRawChars())) { 1.373 + c = userbuf.getRawChar(); 1.374 + 1.375 + // Normalize the jschar if it was a newline. We need to detect any of 1.376 + // these four characters: '\n' (0x000a), '\r' (0x000d), 1.377 + // LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each 1.378 + // one in turn is slow, so we use a single probabilistic check, and if 1.379 + // that succeeds, test for them individually. 1.380 + // 1.381 + // We use the bottom 8 bits to index into a lookup table, succeeding 1.382 + // when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which 1.383 + // are by the far the most common) this gives false positives for '(' 1.384 + // (0x0028) and ')' (0x0029). We could avoid those by incorporating 1.385 + // the 13th bit of d into the lookup, but that requires extra shifting 1.386 + // and masking and isn't worthwhile. See TokenStream::TokenStream() 1.387 + // for the initialization of the relevant entries in the table. 1.388 + if (MOZ_UNLIKELY(maybeEOL[c & 0xff])) { 1.389 + if (c == '\n') 1.390 + goto eol; 1.391 + if (c == '\r') { 1.392 + // If it's a \r\n sequence: treat as a single EOL, skip over the \n. 1.393 + if (userbuf.hasRawChars()) 1.394 + userbuf.matchRawChar('\n'); 1.395 + goto eol; 1.396 + } 1.397 + if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) 1.398 + goto eol; 1.399 + } 1.400 + return c; 1.401 + } 1.402 + 1.403 + flags.isEOF = true; 1.404 + return EOF; 1.405 + 1.406 + eol: 1.407 + updateLineInfoForEOL(); 1.408 + return '\n'; 1.409 +} 1.410 + 1.411 +// This gets the next char. It does nothing special with EOL sequences, not 1.412 +// even updating the line counters. It can be used safely if (a) the 1.413 +// resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if 1.414 +// it's an EOL, and (b) the line-related state (lineno, linebase) is not used 1.415 +// before it's ungotten. 1.416 +int32_t 1.417 +TokenStream::getCharIgnoreEOL() 1.418 +{ 1.419 + if (MOZ_LIKELY(userbuf.hasRawChars())) 1.420 + return userbuf.getRawChar(); 1.421 + 1.422 + flags.isEOF = true; 1.423 + return EOF; 1.424 +} 1.425 + 1.426 +void 1.427 +TokenStream::ungetChar(int32_t c) 1.428 +{ 1.429 + if (c == EOF) 1.430 + return; 1.431 + JS_ASSERT(!userbuf.atStart()); 1.432 + userbuf.ungetRawChar(); 1.433 + if (c == '\n') { 1.434 +#ifdef DEBUG 1.435 + int32_t c2 = userbuf.peekRawChar(); 1.436 + JS_ASSERT(TokenBuf::isRawEOLChar(c2)); 1.437 +#endif 1.438 + 1.439 + // If it's a \r\n sequence, also unget the \r. 1.440 + if (!userbuf.atStart()) 1.441 + userbuf.matchRawCharBackwards('\r'); 1.442 + 1.443 + JS_ASSERT(prevLinebase); // we should never get more than one EOL char 1.444 + linebase = prevLinebase; 1.445 + prevLinebase = nullptr; 1.446 + lineno--; 1.447 + } else { 1.448 + JS_ASSERT(userbuf.peekRawChar() == c); 1.449 + } 1.450 +} 1.451 + 1.452 +void 1.453 +TokenStream::ungetCharIgnoreEOL(int32_t c) 1.454 +{ 1.455 + if (c == EOF) 1.456 + return; 1.457 + JS_ASSERT(!userbuf.atStart()); 1.458 + userbuf.ungetRawChar(); 1.459 +} 1.460 + 1.461 +// Return true iff |n| raw characters can be read from this without reading past 1.462 +// EOF or a newline, and copy those characters into |cp| if so. The characters 1.463 +// are not consumed: use skipChars(n) to do so after checking that the consumed 1.464 +// characters had appropriate values. 1.465 +bool 1.466 +TokenStream::peekChars(int n, jschar *cp) 1.467 +{ 1.468 + int i, j; 1.469 + int32_t c; 1.470 + 1.471 + for (i = 0; i < n; i++) { 1.472 + c = getCharIgnoreEOL(); 1.473 + if (c == EOF) 1.474 + break; 1.475 + if (c == '\n') { 1.476 + ungetCharIgnoreEOL(c); 1.477 + break; 1.478 + } 1.479 + cp[i] = jschar(c); 1.480 + } 1.481 + for (j = i - 1; j >= 0; j--) 1.482 + ungetCharIgnoreEOL(cp[j]); 1.483 + return i == n; 1.484 +} 1.485 + 1.486 +const jschar * 1.487 +TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max) 1.488 +{ 1.489 + JS_ASSERT(base_ <= p && p <= limit_); 1.490 + 1.491 + size_t n = 0; 1.492 + while (true) { 1.493 + if (p >= limit_) 1.494 + break; 1.495 + if (n >= max) 1.496 + break; 1.497 + if (TokenBuf::isRawEOLChar(*p++)) 1.498 + break; 1.499 + n++; 1.500 + } 1.501 + return p; 1.502 +} 1.503 + 1.504 +void 1.505 +TokenStream::advance(size_t position) 1.506 +{ 1.507 + const jschar *end = userbuf.base() + position; 1.508 + while (userbuf.addressOfNextRawChar() < end) 1.509 + getChar(); 1.510 + 1.511 + Token *cur = &tokens[cursor]; 1.512 + cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base(); 1.513 + cur->type = TOK_ERROR; 1.514 + lookahead = 0; 1.515 +} 1.516 + 1.517 +void 1.518 +TokenStream::tell(Position *pos) 1.519 +{ 1.520 + pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true); 1.521 + pos->flags = flags; 1.522 + pos->lineno = lineno; 1.523 + pos->linebase = linebase; 1.524 + pos->prevLinebase = prevLinebase; 1.525 + pos->lookahead = lookahead; 1.526 + pos->currentToken = currentToken(); 1.527 + for (unsigned i = 0; i < lookahead; i++) 1.528 + pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask]; 1.529 +} 1.530 + 1.531 +void 1.532 +TokenStream::seek(const Position &pos) 1.533 +{ 1.534 + userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true); 1.535 + flags = pos.flags; 1.536 + lineno = pos.lineno; 1.537 + linebase = pos.linebase; 1.538 + prevLinebase = pos.prevLinebase; 1.539 + lookahead = pos.lookahead; 1.540 + 1.541 + tokens[cursor] = pos.currentToken; 1.542 + for (unsigned i = 0; i < lookahead; i++) 1.543 + tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i]; 1.544 +} 1.545 + 1.546 +bool 1.547 +TokenStream::seek(const Position &pos, const TokenStream &other) 1.548 +{ 1.549 + if (!srcCoords.fill(other.srcCoords)) 1.550 + return false; 1.551 + seek(pos); 1.552 + return true; 1.553 +} 1.554 + 1.555 +bool 1.556 +TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber, 1.557 + va_list args) 1.558 +{ 1.559 + // In strict mode code, this is an error, not merely a warning. 1.560 + unsigned flags = JSREPORT_STRICT; 1.561 + if (strictMode) 1.562 + flags |= JSREPORT_ERROR; 1.563 + else if (options().extraWarningsOption) 1.564 + flags |= JSREPORT_WARNING; 1.565 + else 1.566 + return true; 1.567 + 1.568 + return reportCompileErrorNumberVA(offset, flags, errorNumber, args); 1.569 +} 1.570 + 1.571 +void 1.572 +CompileError::throwError(JSContext *cx) 1.573 +{ 1.574 + // If there's a runtime exception type associated with this error 1.575 + // number, set that as the pending exception. For errors occuring at 1.576 + // compile time, this is very likely to be a JSEXN_SYNTAXERR. 1.577 + // 1.578 + // If an exception is thrown but not caught, the JSREPORT_EXCEPTION 1.579 + // flag will be set in report.flags. Proper behavior for an error 1.580 + // reporter is to ignore a report with this flag for all but top-level 1.581 + // compilation errors. The exception will remain pending, and so long 1.582 + // as the non-top-level "load", "eval", or "compile" native function 1.583 + // returns false, the top-level reporter will eventually receive the 1.584 + // uncaught exception report. 1.585 + if (!js_ErrorToException(cx, message, &report, nullptr, nullptr)) 1.586 + CallErrorReporter(cx, message, &report); 1.587 +} 1.588 + 1.589 +CompileError::~CompileError() 1.590 +{ 1.591 + js_free((void*)report.uclinebuf); 1.592 + js_free((void*)report.linebuf); 1.593 + js_free((void*)report.ucmessage); 1.594 + js_free(message); 1.595 + message = nullptr; 1.596 + 1.597 + if (report.messageArgs) { 1.598 + if (argumentsType == ArgumentsAreASCII) { 1.599 + unsigned i = 0; 1.600 + while (report.messageArgs[i]) 1.601 + js_free((void*)report.messageArgs[i++]); 1.602 + } 1.603 + js_free(report.messageArgs); 1.604 + } 1.605 + 1.606 + PodZero(&report); 1.607 +} 1.608 + 1.609 +bool 1.610 +TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber, 1.611 + va_list args) 1.612 +{ 1.613 + bool warning = JSREPORT_IS_WARNING(flags); 1.614 + 1.615 + if (warning && options().werrorOption) { 1.616 + flags &= ~JSREPORT_WARNING; 1.617 + warning = false; 1.618 + } 1.619 + 1.620 + // On the main thread, report the error immediately. When compiling off 1.621 + // thread, save the error so that the main thread can report it later. 1.622 + CompileError tempErr; 1.623 + CompileError &err = cx->isJSContext() ? tempErr : cx->addPendingCompileError(); 1.624 + 1.625 + err.report.flags = flags; 1.626 + err.report.errorNumber = errorNumber; 1.627 + err.report.filename = filename; 1.628 + err.report.originPrincipals = originPrincipals; 1.629 + if (offset == NoOffset) { 1.630 + err.report.lineno = 0; 1.631 + err.report.column = 0; 1.632 + } else { 1.633 + err.report.lineno = srcCoords.lineNum(offset); 1.634 + err.report.column = srcCoords.columnIndex(offset); 1.635 + } 1.636 + 1.637 + err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII; 1.638 + 1.639 + if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, nullptr, errorNumber, &err.message, 1.640 + &err.report, err.argumentsType, args)) 1.641 + { 1.642 + return false; 1.643 + } 1.644 + 1.645 + // Given a token, T, that we want to complain about: if T's (starting) 1.646 + // lineno doesn't match TokenStream's lineno, that means we've scanned past 1.647 + // the line that T starts on, which makes it hard to print some or all of 1.648 + // T's (starting) line for context. 1.649 + // 1.650 + // So we don't even try, leaving report.linebuf and friends zeroed. This 1.651 + // means that any error involving a multi-line token (e.g. an unterminated 1.652 + // multi-line string literal) won't have a context printed. 1.653 + if (offset != NoOffset && err.report.lineno == lineno) { 1.654 + const jschar *tokenStart = userbuf.base() + offset; 1.655 + 1.656 + // We show only a portion (a "window") of the line around the erroneous 1.657 + // token -- the first char in the token, plus |windowRadius| chars 1.658 + // before it and |windowRadius - 1| chars after it. This is because 1.659 + // lines can be very long and printing the whole line is (a) not that 1.660 + // helpful, and (b) can waste a lot of memory. See bug 634444. 1.661 + static const size_t windowRadius = 60; 1.662 + 1.663 + // Truncate at the front if necessary. 1.664 + const jschar *windowBase = (linebase + windowRadius < tokenStart) 1.665 + ? tokenStart - windowRadius 1.666 + : linebase; 1.667 + uint32_t windowOffset = tokenStart - windowBase; 1.668 + 1.669 + // Find EOL, or truncate at the back if necessary. 1.670 + const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius); 1.671 + size_t windowLength = windowLimit - windowBase; 1.672 + JS_ASSERT(windowLength <= windowRadius * 2); 1.673 + 1.674 + // Create the windowed strings. 1.675 + StringBuffer windowBuf(cx); 1.676 + if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0)) 1.677 + return false; 1.678 + 1.679 + // Unicode and char versions of the window into the offending source 1.680 + // line, without final \n. 1.681 + err.report.uclinebuf = windowBuf.extractWellSized(); 1.682 + if (!err.report.uclinebuf) 1.683 + return false; 1.684 + TwoByteChars tbchars(err.report.uclinebuf, windowLength); 1.685 + err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str(); 1.686 + if (!err.report.linebuf) 1.687 + return false; 1.688 + 1.689 + err.report.tokenptr = err.report.linebuf + windowOffset; 1.690 + err.report.uctokenptr = err.report.uclinebuf + windowOffset; 1.691 + } 1.692 + 1.693 + if (cx->isJSContext()) 1.694 + err.throwError(cx->asJSContext()); 1.695 + 1.696 + return warning; 1.697 +} 1.698 + 1.699 +bool 1.700 +TokenStream::reportStrictModeError(unsigned errorNumber, ...) 1.701 +{ 1.702 + va_list args; 1.703 + va_start(args, errorNumber); 1.704 + bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(), 1.705 + errorNumber, args); 1.706 + va_end(args); 1.707 + return result; 1.708 +} 1.709 + 1.710 +bool 1.711 +TokenStream::reportError(unsigned errorNumber, ...) 1.712 +{ 1.713 + va_list args; 1.714 + va_start(args, errorNumber); 1.715 + bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber, 1.716 + args); 1.717 + va_end(args); 1.718 + return result; 1.719 +} 1.720 + 1.721 +bool 1.722 +TokenStream::reportWarning(unsigned errorNumber, ...) 1.723 +{ 1.724 + va_list args; 1.725 + va_start(args, errorNumber); 1.726 + bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING, 1.727 + errorNumber, args); 1.728 + va_end(args); 1.729 + return result; 1.730 +} 1.731 + 1.732 +bool 1.733 +TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args) 1.734 +{ 1.735 + if (!options().extraWarningsOption) 1.736 + return true; 1.737 + 1.738 + return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args); 1.739 +} 1.740 + 1.741 +void 1.742 +TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...) 1.743 +{ 1.744 + va_list args; 1.745 + va_start(args, errorNumber); 1.746 + reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args); 1.747 + va_end(args); 1.748 +} 1.749 + 1.750 +// We have encountered a '\': check for a Unicode escape sequence after it. 1.751 +// Return 'true' and the character code value (by value) if we found a 1.752 +// Unicode escape sequence. Otherwise, return 'false'. In both cases, do not 1.753 +// advance along the buffer. 1.754 +bool 1.755 +TokenStream::peekUnicodeEscape(int *result) 1.756 +{ 1.757 + jschar cp[5]; 1.758 + 1.759 + if (peekChars(5, cp) && cp[0] == 'u' && 1.760 + JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) && 1.761 + JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) 1.762 + { 1.763 + *result = (((((JS7_UNHEX(cp[1]) << 4) 1.764 + + JS7_UNHEX(cp[2])) << 4) 1.765 + + JS7_UNHEX(cp[3])) << 4) 1.766 + + JS7_UNHEX(cp[4]); 1.767 + return true; 1.768 + } 1.769 + return false; 1.770 +} 1.771 + 1.772 +bool 1.773 +TokenStream::matchUnicodeEscapeIdStart(int32_t *cp) 1.774 +{ 1.775 + if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) { 1.776 + skipChars(5); 1.777 + return true; 1.778 + } 1.779 + return false; 1.780 +} 1.781 + 1.782 +bool 1.783 +TokenStream::matchUnicodeEscapeIdent(int32_t *cp) 1.784 +{ 1.785 + if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) { 1.786 + skipChars(5); 1.787 + return true; 1.788 + } 1.789 + return false; 1.790 +} 1.791 + 1.792 +// Helper function which returns true if the first length(q) characters in p are 1.793 +// the same as the characters in q. 1.794 +static bool 1.795 +CharsMatch(const jschar *p, const char *q) { 1.796 + while (*q) { 1.797 + if (*p++ != *q++) 1.798 + return false; 1.799 + } 1.800 + return true; 1.801 +} 1.802 + 1.803 +bool 1.804 +TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated) 1.805 +{ 1.806 + // Match directive comments used in debugging, such as "//# sourceURL" and 1.807 + // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated. 1.808 + // 1.809 + // To avoid a crashing bug in IE, several JavaScript transpilers wrap single 1.810 + // line comments containing a source mapping URL inside a multiline 1.811 + // comment. To avoid potentially expensive lookahead and backtracking, we 1.812 + // only check for this case if we encounter a '#' character. 1.813 + 1.814 + if (!getDisplayURL(isMultiline, shouldWarnDeprecated)) 1.815 + return false; 1.816 + if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated)) 1.817 + return false; 1.818 + 1.819 + return true; 1.820 +} 1.821 + 1.822 +bool 1.823 +TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated, 1.824 + const char *directive, int directiveLength, 1.825 + const char *errorMsgPragma, jschar **destination) { 1.826 + JS_ASSERT(directiveLength <= 18); 1.827 + jschar peeked[18]; 1.828 + int32_t c; 1.829 + 1.830 + if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) { 1.831 + if (shouldWarnDeprecated && 1.832 + !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma)) 1.833 + return false; 1.834 + 1.835 + skipChars(directiveLength); 1.836 + tokenbuf.clear(); 1.837 + 1.838 + while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) { 1.839 + getChar(); 1.840 + // Debugging directives can occur in both single- and multi-line 1.841 + // comments. If we're currently inside a multi-line comment, we also 1.842 + // need to recognize multi-line comment terminators. 1.843 + if (isMultiline && c == '*' && peekChar() == '/') { 1.844 + ungetChar('*'); 1.845 + break; 1.846 + } 1.847 + tokenbuf.append(c); 1.848 + } 1.849 + 1.850 + if (tokenbuf.empty()) 1.851 + // The directive's URL was missing, but this is not quite an 1.852 + // exception that we should stop and drop everything for. 1.853 + return true; 1.854 + 1.855 + size_t length = tokenbuf.length(); 1.856 + 1.857 + js_free(*destination); 1.858 + *destination = cx->pod_malloc<jschar>(length + 1); 1.859 + if (!*destination) 1.860 + return false; 1.861 + 1.862 + PodCopy(*destination, tokenbuf.begin(), length); 1.863 + (*destination)[length] = '\0'; 1.864 + } 1.865 + 1.866 + return true; 1.867 +} 1.868 + 1.869 +bool 1.870 +TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated) 1.871 +{ 1.872 + // Match comments of the form "//# sourceURL=<url>" or 1.873 + // "/\* //# sourceURL=<url> *\/" 1.874 + // 1.875 + // Note that while these are labeled "sourceURL" in the source text, 1.876 + // internally we refer to it as a "displayURL" to distinguish what the 1.877 + // developer would like to refer to the source as from the source's actual 1.878 + // URL. 1.879 + 1.880 + return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11, 1.881 + "sourceURL", &displayURL_); 1.882 +} 1.883 + 1.884 +bool 1.885 +TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated) 1.886 +{ 1.887 + // Match comments of the form "//# sourceMappingURL=<url>" or 1.888 + // "/\* //# sourceMappingURL=<url> *\/" 1.889 + 1.890 + return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18, 1.891 + "sourceMappingURL", &sourceMapURL_); 1.892 +} 1.893 + 1.894 +MOZ_ALWAYS_INLINE Token * 1.895 +TokenStream::newToken(ptrdiff_t adjust) 1.896 +{ 1.897 + cursor = (cursor + 1) & ntokensMask; 1.898 + Token *tp = &tokens[cursor]; 1.899 + tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base(); 1.900 + 1.901 + // NOTE: tp->pos.end is not set until the very end of getTokenInternal(). 1.902 + MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end)); 1.903 + 1.904 + return tp; 1.905 +} 1.906 + 1.907 +MOZ_ALWAYS_INLINE JSAtom * 1.908 +TokenStream::atomize(ExclusiveContext *cx, CharBuffer &cb) 1.909 +{ 1.910 + return AtomizeChars(cx, cb.begin(), cb.length()); 1.911 +} 1.912 + 1.913 +#ifdef DEBUG 1.914 +static bool 1.915 +IsTokenSane(Token *tp) 1.916 +{ 1.917 + // Nb: TOK_EOL should never be used in an actual Token; it should only be 1.918 + // returned as a TokenKind from peekTokenSameLine(). 1.919 + if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL) 1.920 + return false; 1.921 + 1.922 + if (tp->pos.end < tp->pos.begin) 1.923 + return false; 1.924 + 1.925 + return true; 1.926 +} 1.927 +#endif 1.928 + 1.929 +bool 1.930 +TokenStream::putIdentInTokenbuf(const jschar *identStart) 1.931 +{ 1.932 + int32_t c, qc; 1.933 + const jschar *tmp = userbuf.addressOfNextRawChar(); 1.934 + userbuf.setAddressOfNextRawChar(identStart); 1.935 + 1.936 + tokenbuf.clear(); 1.937 + for (;;) { 1.938 + c = getCharIgnoreEOL(); 1.939 + if (!IsIdentifierPart(c)) { 1.940 + if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) 1.941 + break; 1.942 + c = qc; 1.943 + } 1.944 + if (!tokenbuf.append(c)) { 1.945 + userbuf.setAddressOfNextRawChar(tmp); 1.946 + return false; 1.947 + } 1.948 + } 1.949 + userbuf.setAddressOfNextRawChar(tmp); 1.950 + return true; 1.951 +} 1.952 + 1.953 +bool 1.954 +TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp) 1.955 +{ 1.956 + const KeywordInfo *kw = FindKeyword(s, length); 1.957 + if (!kw) 1.958 + return true; 1.959 + 1.960 + if (kw->tokentype == TOK_RESERVED) 1.961 + return reportError(JSMSG_RESERVED_ID, kw->chars); 1.962 + 1.963 + if (kw->tokentype != TOK_STRICT_RESERVED) { 1.964 + if (kw->version <= versionNumber()) { 1.965 + // Working keyword. 1.966 + if (ttp) { 1.967 + *ttp = kw->tokentype; 1.968 + return true; 1.969 + } 1.970 + return reportError(JSMSG_RESERVED_ID, kw->chars); 1.971 + } 1.972 + 1.973 + // The keyword is not in this version. Treat it as an identifier, unless 1.974 + // it is let which we treat as TOK_STRICT_RESERVED by falling through to 1.975 + // the code below (ES5 forbids it in strict mode). 1.976 + if (kw->tokentype != TOK_LET) 1.977 + return true; 1.978 + } 1.979 + 1.980 + // Strict reserved word. 1.981 + return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars); 1.982 +} 1.983 + 1.984 +enum FirstCharKind { 1.985 + // A jschar has the 'OneChar' kind if it, by itself, constitutes a valid 1.986 + // token that cannot also be a prefix of a longer token. E.g. ';' has the 1.987 + // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens 1.988 + // that begin with '+'. 1.989 + // 1.990 + // The few token kinds satisfying these properties cover roughly 35--45% 1.991 + // of the tokens seen in practice. 1.992 + // 1.993 + // We represent the 'OneChar' kind with any positive value less than 1.994 + // TOK_LIMIT. This representation lets us associate each one-char token 1.995 + // jschar with a TokenKind and thus avoid a subsequent jschar-to-TokenKind 1.996 + // conversion. 1.997 + OneChar_Min = 0, 1.998 + OneChar_Max = TOK_LIMIT - 1, 1.999 + 1.1000 + Space = TOK_LIMIT, 1.1001 + Ident, 1.1002 + Dec, 1.1003 + String, 1.1004 + EOL, 1.1005 + BasePrefix, 1.1006 + Other, 1.1007 + 1.1008 + LastCharKind = Other 1.1009 +}; 1.1010 + 1.1011 +// OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126: 1.1012 +// '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~' 1.1013 +// Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z' 1.1014 +// Dot: 46: '.' 1.1015 +// Equals: 61: '=' 1.1016 +// String: 34, 39: '"', '\'' 1.1017 +// Dec: 49..57: '1'..'9' 1.1018 +// Plus: 43: '+' 1.1019 +// BasePrefix: 48: '0' 1.1020 +// Space: 9, 11, 12, 32: '\t', '\v', '\f', ' ' 1.1021 +// EOL: 10, 13: '\n', '\r' 1.1022 +// 1.1023 +#define T_COMMA TOK_COMMA 1.1024 +#define T_COLON TOK_COLON 1.1025 +#define T_BITNOT TOK_BITNOT 1.1026 +#define _______ Other 1.1027 +static const uint8_t firstCharKinds[] = { 1.1028 +/* 0 1 2 3 4 5 6 7 8 9 */ 1.1029 +/* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space, 1.1030 +/* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______, 1.1031 +/* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______, 1.1032 +/* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String, 1.1033 +/* 40+ */ TOK_LP, TOK_RP, _______, _______, T_COMMA,_______, _______, _______,BasePrefix, Dec, 1.1034 +/* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON,TOK_SEMI, 1.1035 +/* 60+ */ _______, _______, _______,TOK_HOOK, _______, Ident, Ident, Ident, Ident, Ident, 1.1036 +/* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, 1.1037 +/* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, 1.1038 +/* 90+ */ Ident, TOK_LB, _______, TOK_RB, _______, Ident, _______, Ident, Ident, Ident, 1.1039 +/* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, 1.1040 +/* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, 1.1041 +/* 120+ */ Ident, Ident, Ident, TOK_LC, _______, TOK_RC,T_BITNOT, _______ 1.1042 +}; 1.1043 +#undef T_COMMA 1.1044 +#undef T_COLON 1.1045 +#undef T_BITNOT 1.1046 +#undef _______ 1.1047 + 1.1048 +static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)), 1.1049 + "Elements of firstCharKinds[] are too small"); 1.1050 + 1.1051 +TokenKind 1.1052 +TokenStream::getTokenInternal(Modifier modifier) 1.1053 +{ 1.1054 + int c, qc; 1.1055 + Token *tp; 1.1056 + FirstCharKind c1kind; 1.1057 + const jschar *numStart; 1.1058 + bool hasExp; 1.1059 + DecimalPoint decimalPoint; 1.1060 + const jschar *identStart; 1.1061 + bool hadUnicodeEscape; 1.1062 + 1.1063 + retry: 1.1064 + if (MOZ_UNLIKELY(!userbuf.hasRawChars())) { 1.1065 + tp = newToken(0); 1.1066 + tp->type = TOK_EOF; 1.1067 + flags.isEOF = true; 1.1068 + goto out; 1.1069 + } 1.1070 + 1.1071 + c = userbuf.getRawChar(); 1.1072 + JS_ASSERT(c != EOF); 1.1073 + 1.1074 + // Chars not in the range 0..127 are rare. Getting them out of the way 1.1075 + // early allows subsequent checking to be faster. 1.1076 + if (MOZ_UNLIKELY(c >= 128)) { 1.1077 + if (IsSpaceOrBOM2(c)) { 1.1078 + if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) { 1.1079 + updateLineInfoForEOL(); 1.1080 + updateFlagsForEOL(); 1.1081 + } 1.1082 + 1.1083 + goto retry; 1.1084 + } 1.1085 + 1.1086 + tp = newToken(-1); 1.1087 + 1.1088 + // '$' and '_' don't pass IsLetter, but they're < 128 so never appear here. 1.1089 + JS_STATIC_ASSERT('$' < 128 && '_' < 128); 1.1090 + if (IsLetter(c)) { 1.1091 + identStart = userbuf.addressOfNextRawChar() - 1; 1.1092 + hadUnicodeEscape = false; 1.1093 + goto identifier; 1.1094 + } 1.1095 + 1.1096 + goto badchar; 1.1097 + } 1.1098 + 1.1099 + // Get the token kind, based on the first char. The ordering of c1kind 1.1100 + // comparison is based on the frequency of tokens in real code -- Parsemark 1.1101 + // (which represents typical JS code on the web) and the Unreal demo (which 1.1102 + // represents asm.js code). 1.1103 + // 1.1104 + // Parsemark Unreal 1.1105 + // OneChar 32.9% 39.7% 1.1106 + // Space 25.0% 0.6% 1.1107 + // Ident 19.2% 36.4% 1.1108 + // Dec 7.2% 5.1% 1.1109 + // String 7.9% 0.0% 1.1110 + // EOL 1.7% 0.0% 1.1111 + // BasePrefix 0.4% 4.9% 1.1112 + // Other 5.7% 13.3% 1.1113 + // 1.1114 + // The ordering is based mostly only Parsemark frequencies, with Unreal 1.1115 + // frequencies used to break close categories (e.g. |Dec| and |String|). 1.1116 + // |Other| is biggish, but no other token kind is common enough for it to 1.1117 + // be worth adding extra values to FirstCharKind. 1.1118 + // 1.1119 + c1kind = FirstCharKind(firstCharKinds[c]); 1.1120 + 1.1121 + // Look for an unambiguous single-char token. 1.1122 + // 1.1123 + if (c1kind <= OneChar_Max) { 1.1124 + tp = newToken(-1); 1.1125 + tp->type = TokenKind(c1kind); 1.1126 + goto out; 1.1127 + } 1.1128 + 1.1129 + // Skip over non-EOL whitespace chars. 1.1130 + // 1.1131 + if (c1kind == Space) 1.1132 + goto retry; 1.1133 + 1.1134 + // Look for an identifier. 1.1135 + // 1.1136 + if (c1kind == Ident) { 1.1137 + tp = newToken(-1); 1.1138 + identStart = userbuf.addressOfNextRawChar() - 1; 1.1139 + hadUnicodeEscape = false; 1.1140 + 1.1141 + identifier: 1.1142 + for (;;) { 1.1143 + c = getCharIgnoreEOL(); 1.1144 + if (c == EOF) 1.1145 + break; 1.1146 + if (!IsIdentifierPart(c)) { 1.1147 + if (c != '\\' || !matchUnicodeEscapeIdent(&qc)) 1.1148 + break; 1.1149 + hadUnicodeEscape = true; 1.1150 + } 1.1151 + } 1.1152 + ungetCharIgnoreEOL(c); 1.1153 + 1.1154 + // Identifiers containing no Unicode escapes can be processed directly 1.1155 + // from userbuf. The rest must use the escapes converted via tokenbuf 1.1156 + // before atomizing. 1.1157 + const jschar *chars; 1.1158 + size_t length; 1.1159 + if (hadUnicodeEscape) { 1.1160 + if (!putIdentInTokenbuf(identStart)) 1.1161 + goto error; 1.1162 + 1.1163 + chars = tokenbuf.begin(); 1.1164 + length = tokenbuf.length(); 1.1165 + } else { 1.1166 + chars = identStart; 1.1167 + length = userbuf.addressOfNextRawChar() - identStart; 1.1168 + } 1.1169 + 1.1170 + // Check for keywords unless the parser told us not to. 1.1171 + if (modifier != KeywordIsName) { 1.1172 + tp->type = TOK_NAME; 1.1173 + if (!checkForKeyword(chars, length, &tp->type)) 1.1174 + goto error; 1.1175 + if (tp->type != TOK_NAME) 1.1176 + goto out; 1.1177 + } 1.1178 + 1.1179 + JSAtom *atom = AtomizeChars(cx, chars, length); 1.1180 + if (!atom) 1.1181 + goto error; 1.1182 + tp->type = TOK_NAME; 1.1183 + tp->setName(atom->asPropertyName()); 1.1184 + goto out; 1.1185 + } 1.1186 + 1.1187 + // Look for a decimal number. 1.1188 + // 1.1189 + if (c1kind == Dec) { 1.1190 + tp = newToken(-1); 1.1191 + numStart = userbuf.addressOfNextRawChar() - 1; 1.1192 + 1.1193 + decimal: 1.1194 + decimalPoint = NoDecimal; 1.1195 + hasExp = false; 1.1196 + while (JS7_ISDEC(c)) 1.1197 + c = getCharIgnoreEOL(); 1.1198 + 1.1199 + if (c == '.') { 1.1200 + decimalPoint = HasDecimal; 1.1201 + decimal_dot: 1.1202 + do { 1.1203 + c = getCharIgnoreEOL(); 1.1204 + } while (JS7_ISDEC(c)); 1.1205 + } 1.1206 + if (c == 'e' || c == 'E') { 1.1207 + hasExp = true; 1.1208 + c = getCharIgnoreEOL(); 1.1209 + if (c == '+' || c == '-') 1.1210 + c = getCharIgnoreEOL(); 1.1211 + if (!JS7_ISDEC(c)) { 1.1212 + ungetCharIgnoreEOL(c); 1.1213 + reportError(JSMSG_MISSING_EXPONENT); 1.1214 + goto error; 1.1215 + } 1.1216 + do { 1.1217 + c = getCharIgnoreEOL(); 1.1218 + } while (JS7_ISDEC(c)); 1.1219 + } 1.1220 + ungetCharIgnoreEOL(c); 1.1221 + 1.1222 + if (c != EOF && IsIdentifierStart(c)) { 1.1223 + reportError(JSMSG_IDSTART_AFTER_NUMBER); 1.1224 + goto error; 1.1225 + } 1.1226 + 1.1227 + // Unlike identifiers and strings, numbers cannot contain escaped 1.1228 + // chars, so we don't need to use tokenbuf. Instead we can just 1.1229 + // convert the jschars in userbuf directly to the numeric value. 1.1230 + double dval; 1.1231 + if (!((decimalPoint == HasDecimal) || hasExp)) { 1.1232 + if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval)) 1.1233 + goto error; 1.1234 + } else { 1.1235 + const jschar *dummy; 1.1236 + if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval)) 1.1237 + goto error; 1.1238 + } 1.1239 + tp->type = TOK_NUMBER; 1.1240 + tp->setNumber(dval, decimalPoint); 1.1241 + goto out; 1.1242 + } 1.1243 + 1.1244 + // Look for a string. 1.1245 + // 1.1246 + if (c1kind == String) { 1.1247 + tp = newToken(-1); 1.1248 + qc = c; 1.1249 + tokenbuf.clear(); 1.1250 + while (true) { 1.1251 + // We need to detect any of these chars: " or ', \n (or its 1.1252 + // equivalents), \\, EOF. We use maybeStrSpecial[] in a manner 1.1253 + // similar to maybeEOL[], see above. Because we detect EOL 1.1254 + // sequences here and put them back immediately, we can use 1.1255 + // getCharIgnoreEOL(). 1.1256 + c = getCharIgnoreEOL(); 1.1257 + if (maybeStrSpecial[c & 0xff]) { 1.1258 + if (c == qc) 1.1259 + break; 1.1260 + if (c == '\\') { 1.1261 + switch (c = getChar()) { 1.1262 + case 'b': c = '\b'; break; 1.1263 + case 'f': c = '\f'; break; 1.1264 + case 'n': c = '\n'; break; 1.1265 + case 'r': c = '\r'; break; 1.1266 + case 't': c = '\t'; break; 1.1267 + case 'v': c = '\v'; break; 1.1268 + 1.1269 + default: 1.1270 + if ('0' <= c && c < '8') { 1.1271 + int32_t val = JS7_UNDEC(c); 1.1272 + 1.1273 + c = peekChar(); 1.1274 + // Strict mode code allows only \0, then a non-digit. 1.1275 + if (val != 0 || JS7_ISDEC(c)) { 1.1276 + if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL)) 1.1277 + goto error; 1.1278 + flags.sawOctalEscape = true; 1.1279 + } 1.1280 + if ('0' <= c && c < '8') { 1.1281 + val = 8 * val + JS7_UNDEC(c); 1.1282 + getChar(); 1.1283 + c = peekChar(); 1.1284 + if ('0' <= c && c < '8') { 1.1285 + int32_t save = val; 1.1286 + val = 8 * val + JS7_UNDEC(c); 1.1287 + if (val <= 0377) 1.1288 + getChar(); 1.1289 + else 1.1290 + val = save; 1.1291 + } 1.1292 + } 1.1293 + 1.1294 + c = jschar(val); 1.1295 + } else if (c == 'u') { 1.1296 + jschar cp[4]; 1.1297 + if (peekChars(4, cp) && 1.1298 + JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) && 1.1299 + JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) { 1.1300 + c = (((((JS7_UNHEX(cp[0]) << 4) 1.1301 + + JS7_UNHEX(cp[1])) << 4) 1.1302 + + JS7_UNHEX(cp[2])) << 4) 1.1303 + + JS7_UNHEX(cp[3]); 1.1304 + skipChars(4); 1.1305 + } else { 1.1306 + reportError(JSMSG_MALFORMED_ESCAPE, "Unicode"); 1.1307 + goto error; 1.1308 + } 1.1309 + } else if (c == 'x') { 1.1310 + jschar cp[2]; 1.1311 + if (peekChars(2, cp) && 1.1312 + JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) { 1.1313 + c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]); 1.1314 + skipChars(2); 1.1315 + } else { 1.1316 + reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal"); 1.1317 + goto error; 1.1318 + } 1.1319 + } else if (c == '\n') { 1.1320 + // ES5 7.8.4: an escaped line terminator represents 1.1321 + // no character. 1.1322 + continue; 1.1323 + } 1.1324 + break; 1.1325 + } 1.1326 + } else if (TokenBuf::isRawEOLChar(c) || c == EOF) { 1.1327 + ungetCharIgnoreEOL(c); 1.1328 + reportError(JSMSG_UNTERMINATED_STRING); 1.1329 + goto error; 1.1330 + } 1.1331 + } 1.1332 + if (!tokenbuf.append(c)) 1.1333 + goto error; 1.1334 + } 1.1335 + JSAtom *atom = atomize(cx, tokenbuf); 1.1336 + if (!atom) 1.1337 + goto error; 1.1338 + tp->type = TOK_STRING; 1.1339 + tp->setAtom(atom); 1.1340 + goto out; 1.1341 + } 1.1342 + 1.1343 + // Skip over EOL chars, updating line state along the way. 1.1344 + // 1.1345 + if (c1kind == EOL) { 1.1346 + // If it's a \r\n sequence: treat as a single EOL, skip over the \n. 1.1347 + if (c == '\r' && userbuf.hasRawChars()) 1.1348 + userbuf.matchRawChar('\n'); 1.1349 + updateLineInfoForEOL(); 1.1350 + updateFlagsForEOL(); 1.1351 + goto retry; 1.1352 + } 1.1353 + 1.1354 + // Look for a hexadecimal, octal, or binary number. 1.1355 + // 1.1356 + if (c1kind == BasePrefix) { 1.1357 + tp = newToken(-1); 1.1358 + int radix; 1.1359 + c = getCharIgnoreEOL(); 1.1360 + if (c == 'x' || c == 'X') { 1.1361 + radix = 16; 1.1362 + c = getCharIgnoreEOL(); 1.1363 + if (!JS7_ISHEX(c)) { 1.1364 + ungetCharIgnoreEOL(c); 1.1365 + reportError(JSMSG_MISSING_HEXDIGITS); 1.1366 + goto error; 1.1367 + } 1.1368 + numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0x' 1.1369 + while (JS7_ISHEX(c)) 1.1370 + c = getCharIgnoreEOL(); 1.1371 + } else if (c == 'b' || c == 'B') { 1.1372 + radix = 2; 1.1373 + c = getCharIgnoreEOL(); 1.1374 + if (c != '0' && c != '1') { 1.1375 + ungetCharIgnoreEOL(c); 1.1376 + reportError(JSMSG_MISSING_BINARY_DIGITS); 1.1377 + goto error; 1.1378 + } 1.1379 + numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0b' 1.1380 + while (c == '0' || c == '1') 1.1381 + c = getCharIgnoreEOL(); 1.1382 + } else if (c == 'o' || c == 'O') { 1.1383 + radix = 8; 1.1384 + c = getCharIgnoreEOL(); 1.1385 + if (c < '0' || c > '7') { 1.1386 + ungetCharIgnoreEOL(c); 1.1387 + reportError(JSMSG_MISSING_OCTAL_DIGITS); 1.1388 + goto error; 1.1389 + } 1.1390 + numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0o' 1.1391 + while ('0' <= c && c <= '7') 1.1392 + c = getCharIgnoreEOL(); 1.1393 + } else if (JS7_ISDEC(c)) { 1.1394 + radix = 8; 1.1395 + numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0' 1.1396 + while (JS7_ISDEC(c)) { 1.1397 + // Octal integer literals are not permitted in strict mode code. 1.1398 + if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL)) 1.1399 + goto error; 1.1400 + 1.1401 + // Outside strict mode, we permit 08 and 09 as decimal numbers, 1.1402 + // which makes our behaviour a superset of the ECMA numeric 1.1403 + // grammar. We might not always be so permissive, so we warn 1.1404 + // about it. 1.1405 + if (c >= '8') { 1.1406 + if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) { 1.1407 + goto error; 1.1408 + } 1.1409 + goto decimal; // use the decimal scanner for the rest of the number 1.1410 + } 1.1411 + c = getCharIgnoreEOL(); 1.1412 + } 1.1413 + } else { 1.1414 + // '0' not followed by 'x', 'X' or a digit; scan as a decimal number. 1.1415 + numStart = userbuf.addressOfNextRawChar() - 1; 1.1416 + goto decimal; 1.1417 + } 1.1418 + ungetCharIgnoreEOL(c); 1.1419 + 1.1420 + if (c != EOF && IsIdentifierStart(c)) { 1.1421 + reportError(JSMSG_IDSTART_AFTER_NUMBER); 1.1422 + goto error; 1.1423 + } 1.1424 + 1.1425 + double dval; 1.1426 + const jschar *dummy; 1.1427 + if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval)) 1.1428 + goto error; 1.1429 + tp->type = TOK_NUMBER; 1.1430 + tp->setNumber(dval, NoDecimal); 1.1431 + goto out; 1.1432 + } 1.1433 + 1.1434 + // This handles everything else. 1.1435 + // 1.1436 + JS_ASSERT(c1kind == Other); 1.1437 + tp = newToken(-1); 1.1438 + switch (c) { 1.1439 + case '.': 1.1440 + c = getCharIgnoreEOL(); 1.1441 + if (JS7_ISDEC(c)) { 1.1442 + numStart = userbuf.addressOfNextRawChar() - 2; 1.1443 + decimalPoint = HasDecimal; 1.1444 + hasExp = false; 1.1445 + goto decimal_dot; 1.1446 + } 1.1447 + if (c == '.') { 1.1448 + if (matchChar('.')) { 1.1449 + tp->type = TOK_TRIPLEDOT; 1.1450 + goto out; 1.1451 + } 1.1452 + } 1.1453 + ungetCharIgnoreEOL(c); 1.1454 + tp->type = TOK_DOT; 1.1455 + goto out; 1.1456 + 1.1457 + case '=': 1.1458 + if (matchChar('=')) 1.1459 + tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ; 1.1460 + else if (matchChar('>')) 1.1461 + tp->type = TOK_ARROW; 1.1462 + else 1.1463 + tp->type = TOK_ASSIGN; 1.1464 + goto out; 1.1465 + 1.1466 + case '+': 1.1467 + if (matchChar('+')) 1.1468 + tp->type = TOK_INC; 1.1469 + else 1.1470 + tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD; 1.1471 + goto out; 1.1472 + 1.1473 + case '\\': 1.1474 + hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc); 1.1475 + if (hadUnicodeEscape) { 1.1476 + identStart = userbuf.addressOfNextRawChar() - 6; 1.1477 + goto identifier; 1.1478 + } 1.1479 + goto badchar; 1.1480 + 1.1481 + case '|': 1.1482 + if (matchChar('|')) 1.1483 + tp->type = TOK_OR; 1.1484 + else 1.1485 + tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR; 1.1486 + goto out; 1.1487 + 1.1488 + case '^': 1.1489 + tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR; 1.1490 + goto out; 1.1491 + 1.1492 + case '&': 1.1493 + if (matchChar('&')) 1.1494 + tp->type = TOK_AND; 1.1495 + else 1.1496 + tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND; 1.1497 + goto out; 1.1498 + 1.1499 + case '!': 1.1500 + if (matchChar('=')) 1.1501 + tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE; 1.1502 + else 1.1503 + tp->type = TOK_NOT; 1.1504 + goto out; 1.1505 + 1.1506 + case '<': 1.1507 + // NB: treat HTML begin-comment as comment-till-end-of-line. 1.1508 + if (matchChar('!')) { 1.1509 + if (matchChar('-')) { 1.1510 + if (matchChar('-')) 1.1511 + goto skipline; 1.1512 + ungetChar('-'); 1.1513 + } 1.1514 + ungetChar('!'); 1.1515 + } 1.1516 + if (matchChar('<')) { 1.1517 + tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH; 1.1518 + } else { 1.1519 + tp->type = matchChar('=') ? TOK_LE : TOK_LT; 1.1520 + } 1.1521 + goto out; 1.1522 + 1.1523 + case '>': 1.1524 + if (matchChar('>')) { 1.1525 + if (matchChar('>')) 1.1526 + tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH; 1.1527 + else 1.1528 + tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH; 1.1529 + } else { 1.1530 + tp->type = matchChar('=') ? TOK_GE : TOK_GT; 1.1531 + } 1.1532 + goto out; 1.1533 + 1.1534 + case '*': 1.1535 + tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL; 1.1536 + goto out; 1.1537 + 1.1538 + case '/': 1.1539 + // Look for a single-line comment. 1.1540 + if (matchChar('/')) { 1.1541 + c = peekChar(); 1.1542 + if (c == '@' || c == '#') { 1.1543 + bool shouldWarn = getChar() == '@'; 1.1544 + if (!getDirectives(false, shouldWarn)) 1.1545 + goto error; 1.1546 + } 1.1547 + 1.1548 + skipline: 1.1549 + while ((c = getChar()) != EOF && c != '\n') 1.1550 + continue; 1.1551 + ungetChar(c); 1.1552 + cursor = (cursor - 1) & ntokensMask; 1.1553 + goto retry; 1.1554 + } 1.1555 + 1.1556 + // Look for a multi-line comment. 1.1557 + if (matchChar('*')) { 1.1558 + unsigned linenoBefore = lineno; 1.1559 + while ((c = getChar()) != EOF && 1.1560 + !(c == '*' && matchChar('/'))) { 1.1561 + if (c == '@' || c == '#') { 1.1562 + bool shouldWarn = c == '@'; 1.1563 + if (!getDirectives(true, shouldWarn)) 1.1564 + goto error; 1.1565 + } 1.1566 + } 1.1567 + if (c == EOF) { 1.1568 + reportError(JSMSG_UNTERMINATED_COMMENT); 1.1569 + goto error; 1.1570 + } 1.1571 + if (linenoBefore != lineno) 1.1572 + updateFlagsForEOL(); 1.1573 + cursor = (cursor - 1) & ntokensMask; 1.1574 + goto retry; 1.1575 + } 1.1576 + 1.1577 + // Look for a regexp. 1.1578 + if (modifier == Operand) { 1.1579 + tokenbuf.clear(); 1.1580 + 1.1581 + bool inCharClass = false; 1.1582 + for (;;) { 1.1583 + c = getChar(); 1.1584 + if (c == '\\') { 1.1585 + if (!tokenbuf.append(c)) 1.1586 + goto error; 1.1587 + c = getChar(); 1.1588 + } else if (c == '[') { 1.1589 + inCharClass = true; 1.1590 + } else if (c == ']') { 1.1591 + inCharClass = false; 1.1592 + } else if (c == '/' && !inCharClass) { 1.1593 + // For compat with IE, allow unescaped / in char classes. 1.1594 + break; 1.1595 + } 1.1596 + if (c == '\n' || c == EOF) { 1.1597 + ungetChar(c); 1.1598 + reportError(JSMSG_UNTERMINATED_REGEXP); 1.1599 + goto error; 1.1600 + } 1.1601 + if (!tokenbuf.append(c)) 1.1602 + goto error; 1.1603 + } 1.1604 + 1.1605 + RegExpFlag reflags = NoFlags; 1.1606 + unsigned length = tokenbuf.length() + 1; 1.1607 + while (true) { 1.1608 + c = peekChar(); 1.1609 + if (c == 'g' && !(reflags & GlobalFlag)) 1.1610 + reflags = RegExpFlag(reflags | GlobalFlag); 1.1611 + else if (c == 'i' && !(reflags & IgnoreCaseFlag)) 1.1612 + reflags = RegExpFlag(reflags | IgnoreCaseFlag); 1.1613 + else if (c == 'm' && !(reflags & MultilineFlag)) 1.1614 + reflags = RegExpFlag(reflags | MultilineFlag); 1.1615 + else if (c == 'y' && !(reflags & StickyFlag)) 1.1616 + reflags = RegExpFlag(reflags | StickyFlag); 1.1617 + else 1.1618 + break; 1.1619 + getChar(); 1.1620 + length++; 1.1621 + } 1.1622 + 1.1623 + c = peekChar(); 1.1624 + if (JS7_ISLET(c)) { 1.1625 + char buf[2] = { '\0', '\0' }; 1.1626 + tp->pos.begin += length + 1; 1.1627 + buf[0] = char(c); 1.1628 + reportError(JSMSG_BAD_REGEXP_FLAG, buf); 1.1629 + (void) getChar(); 1.1630 + goto error; 1.1631 + } 1.1632 + tp->type = TOK_REGEXP; 1.1633 + tp->setRegExpFlags(reflags); 1.1634 + goto out; 1.1635 + } 1.1636 + 1.1637 + tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV; 1.1638 + goto out; 1.1639 + 1.1640 + case '%': 1.1641 + tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD; 1.1642 + goto out; 1.1643 + 1.1644 + case '-': 1.1645 + if (matchChar('-')) { 1.1646 + if (peekChar() == '>' && !flags.isDirtyLine) 1.1647 + goto skipline; 1.1648 + tp->type = TOK_DEC; 1.1649 + } else { 1.1650 + tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB; 1.1651 + } 1.1652 + goto out; 1.1653 + 1.1654 + badchar: 1.1655 + default: 1.1656 + reportError(JSMSG_ILLEGAL_CHARACTER); 1.1657 + goto error; 1.1658 + } 1.1659 + 1.1660 + MOZ_ASSUME_UNREACHABLE("should have jumped to |out| or |error|"); 1.1661 + 1.1662 + out: 1.1663 + flags.isDirtyLine = true; 1.1664 + tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base(); 1.1665 + JS_ASSERT(IsTokenSane(tp)); 1.1666 + return tp->type; 1.1667 + 1.1668 + error: 1.1669 + flags.isDirtyLine = true; 1.1670 + tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base(); 1.1671 + tp->type = TOK_ERROR; 1.1672 + JS_ASSERT(IsTokenSane(tp)); 1.1673 + onError(); 1.1674 + return TOK_ERROR; 1.1675 +} 1.1676 + 1.1677 +void 1.1678 +TokenStream::onError() 1.1679 +{ 1.1680 + flags.hadError = true; 1.1681 +#ifdef DEBUG 1.1682 + // Poisoning userbuf on error establishes an invariant: once an erroneous 1.1683 + // token has been seen, userbuf will not be consulted again. This is true 1.1684 + // because the parser will either (a) deal with the TOK_ERROR token by 1.1685 + // aborting parsing immediately; or (b) if the TOK_ERROR token doesn't 1.1686 + // match what it expected, it will unget the token, and the next getToken() 1.1687 + // call will immediately return the just-gotten TOK_ERROR token again 1.1688 + // without consulting userbuf, thanks to the lookahead buffer. 1.1689 + userbuf.poison(); 1.1690 +#endif 1.1691 +} 1.1692 + 1.1693 +JS_FRIEND_API(int) 1.1694 +js_fgets(char *buf, int size, FILE *file) 1.1695 +{ 1.1696 + int n, i, c; 1.1697 + bool crflag; 1.1698 + 1.1699 + n = size - 1; 1.1700 + if (n < 0) 1.1701 + return -1; 1.1702 + 1.1703 + crflag = false; 1.1704 + for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) { 1.1705 + buf[i] = c; 1.1706 + if (c == '\n') { // any \n ends a line 1.1707 + i++; // keep the \n; we know there is room for \0 1.1708 + break; 1.1709 + } 1.1710 + if (crflag) { // \r not followed by \n ends line at the \r 1.1711 + ungetc(c, file); 1.1712 + break; // and overwrite c in buf with \0 1.1713 + } 1.1714 + crflag = (c == '\r'); 1.1715 + } 1.1716 + 1.1717 + buf[i] = '\0'; 1.1718 + return i; 1.1719 +} 1.1720 + 1.1721 +#ifdef DEBUG 1.1722 +const char * 1.1723 +TokenKindToString(TokenKind tt) 1.1724 +{ 1.1725 + switch (tt) { 1.1726 + case TOK_ERROR: return "TOK_ERROR"; 1.1727 + case TOK_EOF: return "TOK_EOF"; 1.1728 + case TOK_EOL: return "TOK_EOL"; 1.1729 + case TOK_SEMI: return "TOK_SEMI"; 1.1730 + case TOK_COMMA: return "TOK_COMMA"; 1.1731 + case TOK_HOOK: return "TOK_HOOK"; 1.1732 + case TOK_COLON: return "TOK_COLON"; 1.1733 + case TOK_OR: return "TOK_OR"; 1.1734 + case TOK_AND: return "TOK_AND"; 1.1735 + case TOK_BITOR: return "TOK_BITOR"; 1.1736 + case TOK_BITXOR: return "TOK_BITXOR"; 1.1737 + case TOK_BITAND: return "TOK_BITAND"; 1.1738 + case TOK_ADD: return "TOK_ADD"; 1.1739 + case TOK_SUB: return "TOK_SUB"; 1.1740 + case TOK_MUL: return "TOK_MUL"; 1.1741 + case TOK_DIV: return "TOK_DIV"; 1.1742 + case TOK_MOD: return "TOK_MOD"; 1.1743 + case TOK_INC: return "TOK_INC"; 1.1744 + case TOK_DEC: return "TOK_DEC"; 1.1745 + case TOK_DOT: return "TOK_DOT"; 1.1746 + case TOK_TRIPLEDOT: return "TOK_TRIPLEDOT"; 1.1747 + case TOK_LB: return "TOK_LB"; 1.1748 + case TOK_RB: return "TOK_RB"; 1.1749 + case TOK_LC: return "TOK_LC"; 1.1750 + case TOK_RC: return "TOK_RC"; 1.1751 + case TOK_LP: return "TOK_LP"; 1.1752 + case TOK_RP: return "TOK_RP"; 1.1753 + case TOK_ARROW: return "TOK_ARROW"; 1.1754 + case TOK_NAME: return "TOK_NAME"; 1.1755 + case TOK_NUMBER: return "TOK_NUMBER"; 1.1756 + case TOK_STRING: return "TOK_STRING"; 1.1757 + case TOK_REGEXP: return "TOK_REGEXP"; 1.1758 + case TOK_TRUE: return "TOK_TRUE"; 1.1759 + case TOK_FALSE: return "TOK_FALSE"; 1.1760 + case TOK_NULL: return "TOK_NULL"; 1.1761 + case TOK_THIS: return "TOK_THIS"; 1.1762 + case TOK_FUNCTION: return "TOK_FUNCTION"; 1.1763 + case TOK_IF: return "TOK_IF"; 1.1764 + case TOK_ELSE: return "TOK_ELSE"; 1.1765 + case TOK_SWITCH: return "TOK_SWITCH"; 1.1766 + case TOK_CASE: return "TOK_CASE"; 1.1767 + case TOK_DEFAULT: return "TOK_DEFAULT"; 1.1768 + case TOK_WHILE: return "TOK_WHILE"; 1.1769 + case TOK_DO: return "TOK_DO"; 1.1770 + case TOK_FOR: return "TOK_FOR"; 1.1771 + case TOK_BREAK: return "TOK_BREAK"; 1.1772 + case TOK_CONTINUE: return "TOK_CONTINUE"; 1.1773 + case TOK_IN: return "TOK_IN"; 1.1774 + case TOK_VAR: return "TOK_VAR"; 1.1775 + case TOK_CONST: return "TOK_CONST"; 1.1776 + case TOK_WITH: return "TOK_WITH"; 1.1777 + case TOK_RETURN: return "TOK_RETURN"; 1.1778 + case TOK_NEW: return "TOK_NEW"; 1.1779 + case TOK_DELETE: return "TOK_DELETE"; 1.1780 + case TOK_TRY: return "TOK_TRY"; 1.1781 + case TOK_CATCH: return "TOK_CATCH"; 1.1782 + case TOK_FINALLY: return "TOK_FINALLY"; 1.1783 + case TOK_THROW: return "TOK_THROW"; 1.1784 + case TOK_INSTANCEOF: return "TOK_INSTANCEOF"; 1.1785 + case TOK_DEBUGGER: return "TOK_DEBUGGER"; 1.1786 + case TOK_YIELD: return "TOK_YIELD"; 1.1787 + case TOK_LET: return "TOK_LET"; 1.1788 + case TOK_RESERVED: return "TOK_RESERVED"; 1.1789 + case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED"; 1.1790 + case TOK_STRICTEQ: return "TOK_STRICTEQ"; 1.1791 + case TOK_EQ: return "TOK_EQ"; 1.1792 + case TOK_STRICTNE: return "TOK_STRICTNE"; 1.1793 + case TOK_NE: return "TOK_NE"; 1.1794 + case TOK_TYPEOF: return "TOK_TYPEOF"; 1.1795 + case TOK_VOID: return "TOK_VOID"; 1.1796 + case TOK_NOT: return "TOK_NOT"; 1.1797 + case TOK_BITNOT: return "TOK_BITNOT"; 1.1798 + case TOK_LT: return "TOK_LT"; 1.1799 + case TOK_LE: return "TOK_LE"; 1.1800 + case TOK_GT: return "TOK_GT"; 1.1801 + case TOK_GE: return "TOK_GE"; 1.1802 + case TOK_LSH: return "TOK_LSH"; 1.1803 + case TOK_RSH: return "TOK_RSH"; 1.1804 + case TOK_URSH: return "TOK_URSH"; 1.1805 + case TOK_ASSIGN: return "TOK_ASSIGN"; 1.1806 + case TOK_ADDASSIGN: return "TOK_ADDASSIGN"; 1.1807 + case TOK_SUBASSIGN: return "TOK_SUBASSIGN"; 1.1808 + case TOK_BITORASSIGN: return "TOK_BITORASSIGN"; 1.1809 + case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN"; 1.1810 + case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN"; 1.1811 + case TOK_LSHASSIGN: return "TOK_LSHASSIGN"; 1.1812 + case TOK_RSHASSIGN: return "TOK_RSHASSIGN"; 1.1813 + case TOK_URSHASSIGN: return "TOK_URSHASSIGN"; 1.1814 + case TOK_MULASSIGN: return "TOK_MULASSIGN"; 1.1815 + case TOK_DIVASSIGN: return "TOK_DIVASSIGN"; 1.1816 + case TOK_MODASSIGN: return "TOK_MODASSIGN"; 1.1817 + case TOK_EXPORT: return "TOK_EXPORT"; 1.1818 + case TOK_IMPORT: return "TOK_IMPORT"; 1.1819 + case TOK_LIMIT: break; 1.1820 + } 1.1821 + 1.1822 + return "<bad TokenKind>"; 1.1823 +} 1.1824 +#endif