js/src/frontend/TokenStream.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
michael@0 2 * vim: set ts=8 sts=4 et sw=4 tw=99:
michael@0 3 * This Source Code Form is subject to the terms of the Mozilla Public
michael@0 4 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 6
michael@0 7 // JS lexical scanner.
michael@0 8
michael@0 9 #include "frontend/TokenStream.h"
michael@0 10
michael@0 11 #include "mozilla/PodOperations.h"
michael@0 12
michael@0 13 #include <ctype.h>
michael@0 14 #include <stdarg.h>
michael@0 15 #include <stdio.h>
michael@0 16 #include <string.h>
michael@0 17
michael@0 18 #include "jsatom.h"
michael@0 19 #include "jscntxt.h"
michael@0 20 #include "jsexn.h"
michael@0 21 #include "jsnum.h"
michael@0 22 #include "jsworkers.h"
michael@0 23
michael@0 24 #include "frontend/BytecodeCompiler.h"
michael@0 25 #include "js/CharacterEncoding.h"
michael@0 26 #include "vm/Keywords.h"
michael@0 27 #include "vm/StringBuffer.h"
michael@0 28
michael@0 29 using namespace js;
michael@0 30 using namespace js::frontend;
michael@0 31 using namespace js::unicode;
michael@0 32
michael@0 33 using mozilla::Maybe;
michael@0 34 using mozilla::PodAssign;
michael@0 35 using mozilla::PodCopy;
michael@0 36 using mozilla::PodZero;
michael@0 37
michael@0 38 struct KeywordInfo {
michael@0 39 const char *chars; // C string with keyword text
michael@0 40 TokenKind tokentype;
michael@0 41 JSVersion version;
michael@0 42 };
michael@0 43
michael@0 44 static const KeywordInfo keywords[] = {
michael@0 45 #define KEYWORD_INFO(keyword, name, type, version) \
michael@0 46 {js_##keyword##_str, type, version},
michael@0 47 FOR_EACH_JAVASCRIPT_KEYWORD(KEYWORD_INFO)
michael@0 48 #undef KEYWORD_INFO
michael@0 49 };
michael@0 50
michael@0 51 // Returns a KeywordInfo for the specified characters, or nullptr if the string
michael@0 52 // is not a keyword.
michael@0 53 static const KeywordInfo *
michael@0 54 FindKeyword(const jschar *s, size_t length)
michael@0 55 {
michael@0 56 JS_ASSERT(length != 0);
michael@0 57
michael@0 58 size_t i;
michael@0 59 const KeywordInfo *kw;
michael@0 60 const char *chars;
michael@0 61
michael@0 62 #define JSKW_LENGTH() length
michael@0 63 #define JSKW_AT(column) s[column]
michael@0 64 #define JSKW_GOT_MATCH(index) i = (index); goto got_match;
michael@0 65 #define JSKW_TEST_GUESS(index) i = (index); goto test_guess;
michael@0 66 #define JSKW_NO_MATCH() goto no_match;
michael@0 67 #include "jsautokw.h"
michael@0 68 #undef JSKW_NO_MATCH
michael@0 69 #undef JSKW_TEST_GUESS
michael@0 70 #undef JSKW_GOT_MATCH
michael@0 71 #undef JSKW_AT
michael@0 72 #undef JSKW_LENGTH
michael@0 73
michael@0 74 got_match:
michael@0 75 return &keywords[i];
michael@0 76
michael@0 77 test_guess:
michael@0 78 kw = &keywords[i];
michael@0 79 chars = kw->chars;
michael@0 80 do {
michael@0 81 if (*s++ != (unsigned char)(*chars++))
michael@0 82 goto no_match;
michael@0 83 } while (--length != 0);
michael@0 84 return kw;
michael@0 85
michael@0 86 no_match:
michael@0 87 return nullptr;
michael@0 88 }
michael@0 89
michael@0 90 bool
michael@0 91 frontend::IsIdentifier(JSLinearString *str)
michael@0 92 {
michael@0 93 const jschar *chars = str->chars();
michael@0 94 size_t length = str->length();
michael@0 95
michael@0 96 if (length == 0)
michael@0 97 return false;
michael@0 98 jschar c = *chars;
michael@0 99 if (!IsIdentifierStart(c))
michael@0 100 return false;
michael@0 101 const jschar *end = chars + length;
michael@0 102 while (++chars != end) {
michael@0 103 c = *chars;
michael@0 104 if (!IsIdentifierPart(c))
michael@0 105 return false;
michael@0 106 }
michael@0 107 return true;
michael@0 108 }
michael@0 109
michael@0 110 bool
michael@0 111 frontend::IsKeyword(JSLinearString *str)
michael@0 112 {
michael@0 113 return FindKeyword(str->chars(), str->length()) != nullptr;
michael@0 114 }
michael@0 115
michael@0 116 TokenStream::SourceCoords::SourceCoords(ExclusiveContext *cx, uint32_t ln)
michael@0 117 : lineStartOffsets_(cx), initialLineNum_(ln), lastLineIndex_(0)
michael@0 118 {
michael@0 119 // This is actually necessary! Removing it causes compile errors on
michael@0 120 // GCC and clang. You could try declaring this:
michael@0 121 //
michael@0 122 // const uint32_t TokenStream::SourceCoords::MAX_PTR;
michael@0 123 //
michael@0 124 // which fixes the GCC/clang error, but causes bustage on Windows. Sigh.
michael@0 125 //
michael@0 126 uint32_t maxPtr = MAX_PTR;
michael@0 127
michael@0 128 // The first line begins at buffer offset 0. MAX_PTR is the sentinel. The
michael@0 129 // appends cannot fail because |lineStartOffsets_| has statically-allocated
michael@0 130 // elements.
michael@0 131 JS_ASSERT(lineStartOffsets_.capacity() >= 2);
michael@0 132 (void)lineStartOffsets_.reserve(2);
michael@0 133 lineStartOffsets_.infallibleAppend(0);
michael@0 134 lineStartOffsets_.infallibleAppend(maxPtr);
michael@0 135 }
michael@0 136
michael@0 137 MOZ_ALWAYS_INLINE void
michael@0 138 TokenStream::SourceCoords::add(uint32_t lineNum, uint32_t lineStartOffset)
michael@0 139 {
michael@0 140 uint32_t lineIndex = lineNumToIndex(lineNum);
michael@0 141 uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
michael@0 142
michael@0 143 JS_ASSERT(lineStartOffsets_[0] == 0 && lineStartOffsets_[sentinelIndex] == MAX_PTR);
michael@0 144
michael@0 145 if (lineIndex == sentinelIndex) {
michael@0 146 // We haven't seen this newline before. Update lineStartOffsets_.
michael@0 147 // We ignore any failures due to OOM -- because we always have a
michael@0 148 // sentinel node, it'll just be like the newline wasn't present. I.e.
michael@0 149 // the line numbers will be wrong, but the code won't crash or anything
michael@0 150 // like that.
michael@0 151 lineStartOffsets_[lineIndex] = lineStartOffset;
michael@0 152
michael@0 153 uint32_t maxPtr = MAX_PTR;
michael@0 154 (void)lineStartOffsets_.append(maxPtr);
michael@0 155
michael@0 156 } else {
michael@0 157 // We have seen this newline before (and ungot it). Do nothing (other
michael@0 158 // than checking it hasn't mysteriously changed).
michael@0 159 JS_ASSERT(lineStartOffsets_[lineIndex] == lineStartOffset);
michael@0 160 }
michael@0 161 }
michael@0 162
michael@0 163 MOZ_ALWAYS_INLINE bool
michael@0 164 TokenStream::SourceCoords::fill(const TokenStream::SourceCoords &other)
michael@0 165 {
michael@0 166 JS_ASSERT(lineStartOffsets_.back() == MAX_PTR);
michael@0 167 JS_ASSERT(other.lineStartOffsets_.back() == MAX_PTR);
michael@0 168
michael@0 169 if (lineStartOffsets_.length() >= other.lineStartOffsets_.length())
michael@0 170 return true;
michael@0 171
michael@0 172 uint32_t sentinelIndex = lineStartOffsets_.length() - 1;
michael@0 173 lineStartOffsets_[sentinelIndex] = other.lineStartOffsets_[sentinelIndex];
michael@0 174
michael@0 175 for (size_t i = sentinelIndex + 1; i < other.lineStartOffsets_.length(); i++) {
michael@0 176 if (!lineStartOffsets_.append(other.lineStartOffsets_[i]))
michael@0 177 return false;
michael@0 178 }
michael@0 179 return true;
michael@0 180 }
michael@0 181
michael@0 182 MOZ_ALWAYS_INLINE uint32_t
michael@0 183 TokenStream::SourceCoords::lineIndexOf(uint32_t offset) const
michael@0 184 {
michael@0 185 uint32_t iMin, iMax, iMid;
michael@0 186
michael@0 187 if (lineStartOffsets_[lastLineIndex_] <= offset) {
michael@0 188 // If we reach here, offset is on a line the same as or higher than
michael@0 189 // last time. Check first for the +0, +1, +2 cases, because they
michael@0 190 // typically cover 85--98% of cases.
michael@0 191 if (offset < lineStartOffsets_[lastLineIndex_ + 1])
michael@0 192 return lastLineIndex_; // lineIndex is same as last time
michael@0 193
michael@0 194 // If we reach here, there must be at least one more entry (plus the
michael@0 195 // sentinel). Try it.
michael@0 196 lastLineIndex_++;
michael@0 197 if (offset < lineStartOffsets_[lastLineIndex_ + 1])
michael@0 198 return lastLineIndex_; // lineIndex is one higher than last time
michael@0 199
michael@0 200 // The same logic applies here.
michael@0 201 lastLineIndex_++;
michael@0 202 if (offset < lineStartOffsets_[lastLineIndex_ + 1]) {
michael@0 203 return lastLineIndex_; // lineIndex is two higher than last time
michael@0 204 }
michael@0 205
michael@0 206 // No luck. Oh well, we have a better-than-default starting point for
michael@0 207 // the binary search.
michael@0 208 iMin = lastLineIndex_ + 1;
michael@0 209 JS_ASSERT(iMin < lineStartOffsets_.length() - 1); // -1 due to the sentinel
michael@0 210
michael@0 211 } else {
michael@0 212 iMin = 0;
michael@0 213 }
michael@0 214
michael@0 215 // This is a binary search with deferred detection of equality, which was
michael@0 216 // marginally faster in this case than a standard binary search.
michael@0 217 // The -2 is because |lineStartOffsets_.length() - 1| is the sentinel, and we
michael@0 218 // want one before that.
michael@0 219 iMax = lineStartOffsets_.length() - 2;
michael@0 220 while (iMax > iMin) {
michael@0 221 iMid = iMin + (iMax - iMin) / 2;
michael@0 222 if (offset >= lineStartOffsets_[iMid + 1])
michael@0 223 iMin = iMid + 1; // offset is above lineStartOffsets_[iMid]
michael@0 224 else
michael@0 225 iMax = iMid; // offset is below or within lineStartOffsets_[iMid]
michael@0 226 }
michael@0 227 JS_ASSERT(iMax == iMin);
michael@0 228 JS_ASSERT(lineStartOffsets_[iMin] <= offset && offset < lineStartOffsets_[iMin + 1]);
michael@0 229 lastLineIndex_ = iMin;
michael@0 230 return iMin;
michael@0 231 }
michael@0 232
michael@0 233 uint32_t
michael@0 234 TokenStream::SourceCoords::lineNum(uint32_t offset) const
michael@0 235 {
michael@0 236 uint32_t lineIndex = lineIndexOf(offset);
michael@0 237 return lineIndexToNum(lineIndex);
michael@0 238 }
michael@0 239
michael@0 240 uint32_t
michael@0 241 TokenStream::SourceCoords::columnIndex(uint32_t offset) const
michael@0 242 {
michael@0 243 uint32_t lineIndex = lineIndexOf(offset);
michael@0 244 uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
michael@0 245 JS_ASSERT(offset >= lineStartOffset);
michael@0 246 return offset - lineStartOffset;
michael@0 247 }
michael@0 248
michael@0 249 void
michael@0 250 TokenStream::SourceCoords::lineNumAndColumnIndex(uint32_t offset, uint32_t *lineNum,
michael@0 251 uint32_t *columnIndex) const
michael@0 252 {
michael@0 253 uint32_t lineIndex = lineIndexOf(offset);
michael@0 254 *lineNum = lineIndexToNum(lineIndex);
michael@0 255 uint32_t lineStartOffset = lineStartOffsets_[lineIndex];
michael@0 256 JS_ASSERT(offset >= lineStartOffset);
michael@0 257 *columnIndex = offset - lineStartOffset;
michael@0 258 }
michael@0 259
michael@0 260 #ifdef _MSC_VER
michael@0 261 #pragma warning(push)
michael@0 262 #pragma warning(disable:4351)
michael@0 263 #endif
michael@0 264
michael@0 265 // Initialize members that aren't initialized in |init|.
michael@0 266 TokenStream::TokenStream(ExclusiveContext *cx, const ReadOnlyCompileOptions &options,
michael@0 267 const jschar *base, size_t length, StrictModeGetter *smg)
michael@0 268 : srcCoords(cx, options.lineno),
michael@0 269 options_(options),
michael@0 270 tokens(),
michael@0 271 cursor(),
michael@0 272 lookahead(),
michael@0 273 lineno(options.lineno),
michael@0 274 flags(),
michael@0 275 linebase(base - options.column),
michael@0 276 prevLinebase(nullptr),
michael@0 277 userbuf(cx, base - options.column, length + options.column), // See comment below
michael@0 278 filename(options.filename()),
michael@0 279 displayURL_(nullptr),
michael@0 280 sourceMapURL_(nullptr),
michael@0 281 tokenbuf(cx),
michael@0 282 cx(cx),
michael@0 283 originPrincipals(options.originPrincipals(cx)),
michael@0 284 strictModeGetter(smg)
michael@0 285 {
michael@0 286 // The caller must ensure that a reference is held on the supplied principals
michael@0 287 // throughout compilation.
michael@0 288 JS_ASSERT_IF(originPrincipals, originPrincipals->refcount > 0);
michael@0 289
michael@0 290 // Column numbers are computed as offsets from the current line's base, so the
michael@0 291 // initial line's base must be included in the buffer. linebase and userbuf
michael@0 292 // were adjusted above, and if we are starting tokenization part way through
michael@0 293 // this line then adjust the next character.
michael@0 294 userbuf.setAddressOfNextRawChar(base);
michael@0 295
michael@0 296 // Nb: the following tables could be static, but initializing them here is
michael@0 297 // much easier. Don't worry, the time to initialize them for each
michael@0 298 // TokenStream is trivial. See bug 639420.
michael@0 299
michael@0 300 // See getChar() for an explanation of maybeEOL[].
michael@0 301 memset(maybeEOL, 0, sizeof(maybeEOL));
michael@0 302 maybeEOL[unsigned('\n')] = true;
michael@0 303 maybeEOL[unsigned('\r')] = true;
michael@0 304 maybeEOL[unsigned(LINE_SEPARATOR & 0xff)] = true;
michael@0 305 maybeEOL[unsigned(PARA_SEPARATOR & 0xff)] = true;
michael@0 306
michael@0 307 // See getTokenInternal() for an explanation of maybeStrSpecial[].
michael@0 308 memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
michael@0 309 maybeStrSpecial[unsigned('"')] = true;
michael@0 310 maybeStrSpecial[unsigned('\'')] = true;
michael@0 311 maybeStrSpecial[unsigned('\\')] = true;
michael@0 312 maybeStrSpecial[unsigned('\n')] = true;
michael@0 313 maybeStrSpecial[unsigned('\r')] = true;
michael@0 314 maybeStrSpecial[unsigned(LINE_SEPARATOR & 0xff)] = true;
michael@0 315 maybeStrSpecial[unsigned(PARA_SEPARATOR & 0xff)] = true;
michael@0 316 maybeStrSpecial[unsigned(EOF & 0xff)] = true;
michael@0 317
michael@0 318 // See Parser::assignExpr() for an explanation of isExprEnding[].
michael@0 319 memset(isExprEnding, 0, sizeof(isExprEnding));
michael@0 320 isExprEnding[TOK_COMMA] = 1;
michael@0 321 isExprEnding[TOK_SEMI] = 1;
michael@0 322 isExprEnding[TOK_COLON] = 1;
michael@0 323 isExprEnding[TOK_RP] = 1;
michael@0 324 isExprEnding[TOK_RB] = 1;
michael@0 325 isExprEnding[TOK_RC] = 1;
michael@0 326 }
michael@0 327
michael@0 328 #ifdef _MSC_VER
michael@0 329 #pragma warning(pop)
michael@0 330 #endif
michael@0 331
michael@0 332 TokenStream::~TokenStream()
michael@0 333 {
michael@0 334 js_free(displayURL_);
michael@0 335 js_free(sourceMapURL_);
michael@0 336
michael@0 337 JS_ASSERT_IF(originPrincipals, originPrincipals->refcount);
michael@0 338 }
michael@0 339
michael@0 340 // Use the fastest available getc.
michael@0 341 #if defined(HAVE_GETC_UNLOCKED)
michael@0 342 # define fast_getc getc_unlocked
michael@0 343 #elif defined(HAVE__GETC_NOLOCK)
michael@0 344 # define fast_getc _getc_nolock
michael@0 345 #else
michael@0 346 # define fast_getc getc
michael@0 347 #endif
michael@0 348
michael@0 349 MOZ_ALWAYS_INLINE void
michael@0 350 TokenStream::updateLineInfoForEOL()
michael@0 351 {
michael@0 352 prevLinebase = linebase;
michael@0 353 linebase = userbuf.addressOfNextRawChar();
michael@0 354 lineno++;
michael@0 355 srcCoords.add(lineno, linebase - userbuf.base());
michael@0 356 }
michael@0 357
michael@0 358 MOZ_ALWAYS_INLINE void
michael@0 359 TokenStream::updateFlagsForEOL()
michael@0 360 {
michael@0 361 flags.isDirtyLine = false;
michael@0 362 }
michael@0 363
michael@0 364 // This gets the next char, normalizing all EOL sequences to '\n' as it goes.
michael@0 365 int32_t
michael@0 366 TokenStream::getChar()
michael@0 367 {
michael@0 368 int32_t c;
michael@0 369 if (MOZ_LIKELY(userbuf.hasRawChars())) {
michael@0 370 c = userbuf.getRawChar();
michael@0 371
michael@0 372 // Normalize the jschar if it was a newline. We need to detect any of
michael@0 373 // these four characters: '\n' (0x000a), '\r' (0x000d),
michael@0 374 // LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029). Testing for each
michael@0 375 // one in turn is slow, so we use a single probabilistic check, and if
michael@0 376 // that succeeds, test for them individually.
michael@0 377 //
michael@0 378 // We use the bottom 8 bits to index into a lookup table, succeeding
michael@0 379 // when d&0xff is 0xa, 0xd, 0x28 or 0x29. Among ASCII chars (which
michael@0 380 // are by the far the most common) this gives false positives for '('
michael@0 381 // (0x0028) and ')' (0x0029). We could avoid those by incorporating
michael@0 382 // the 13th bit of d into the lookup, but that requires extra shifting
michael@0 383 // and masking and isn't worthwhile. See TokenStream::TokenStream()
michael@0 384 // for the initialization of the relevant entries in the table.
michael@0 385 if (MOZ_UNLIKELY(maybeEOL[c & 0xff])) {
michael@0 386 if (c == '\n')
michael@0 387 goto eol;
michael@0 388 if (c == '\r') {
michael@0 389 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
michael@0 390 if (userbuf.hasRawChars())
michael@0 391 userbuf.matchRawChar('\n');
michael@0 392 goto eol;
michael@0 393 }
michael@0 394 if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
michael@0 395 goto eol;
michael@0 396 }
michael@0 397 return c;
michael@0 398 }
michael@0 399
michael@0 400 flags.isEOF = true;
michael@0 401 return EOF;
michael@0 402
michael@0 403 eol:
michael@0 404 updateLineInfoForEOL();
michael@0 405 return '\n';
michael@0 406 }
michael@0 407
michael@0 408 // This gets the next char. It does nothing special with EOL sequences, not
michael@0 409 // even updating the line counters. It can be used safely if (a) the
michael@0 410 // resulting char is guaranteed to be ungotten (by ungetCharIgnoreEOL()) if
michael@0 411 // it's an EOL, and (b) the line-related state (lineno, linebase) is not used
michael@0 412 // before it's ungotten.
michael@0 413 int32_t
michael@0 414 TokenStream::getCharIgnoreEOL()
michael@0 415 {
michael@0 416 if (MOZ_LIKELY(userbuf.hasRawChars()))
michael@0 417 return userbuf.getRawChar();
michael@0 418
michael@0 419 flags.isEOF = true;
michael@0 420 return EOF;
michael@0 421 }
michael@0 422
michael@0 423 void
michael@0 424 TokenStream::ungetChar(int32_t c)
michael@0 425 {
michael@0 426 if (c == EOF)
michael@0 427 return;
michael@0 428 JS_ASSERT(!userbuf.atStart());
michael@0 429 userbuf.ungetRawChar();
michael@0 430 if (c == '\n') {
michael@0 431 #ifdef DEBUG
michael@0 432 int32_t c2 = userbuf.peekRawChar();
michael@0 433 JS_ASSERT(TokenBuf::isRawEOLChar(c2));
michael@0 434 #endif
michael@0 435
michael@0 436 // If it's a \r\n sequence, also unget the \r.
michael@0 437 if (!userbuf.atStart())
michael@0 438 userbuf.matchRawCharBackwards('\r');
michael@0 439
michael@0 440 JS_ASSERT(prevLinebase); // we should never get more than one EOL char
michael@0 441 linebase = prevLinebase;
michael@0 442 prevLinebase = nullptr;
michael@0 443 lineno--;
michael@0 444 } else {
michael@0 445 JS_ASSERT(userbuf.peekRawChar() == c);
michael@0 446 }
michael@0 447 }
michael@0 448
michael@0 449 void
michael@0 450 TokenStream::ungetCharIgnoreEOL(int32_t c)
michael@0 451 {
michael@0 452 if (c == EOF)
michael@0 453 return;
michael@0 454 JS_ASSERT(!userbuf.atStart());
michael@0 455 userbuf.ungetRawChar();
michael@0 456 }
michael@0 457
michael@0 458 // Return true iff |n| raw characters can be read from this without reading past
michael@0 459 // EOF or a newline, and copy those characters into |cp| if so. The characters
michael@0 460 // are not consumed: use skipChars(n) to do so after checking that the consumed
michael@0 461 // characters had appropriate values.
michael@0 462 bool
michael@0 463 TokenStream::peekChars(int n, jschar *cp)
michael@0 464 {
michael@0 465 int i, j;
michael@0 466 int32_t c;
michael@0 467
michael@0 468 for (i = 0; i < n; i++) {
michael@0 469 c = getCharIgnoreEOL();
michael@0 470 if (c == EOF)
michael@0 471 break;
michael@0 472 if (c == '\n') {
michael@0 473 ungetCharIgnoreEOL(c);
michael@0 474 break;
michael@0 475 }
michael@0 476 cp[i] = jschar(c);
michael@0 477 }
michael@0 478 for (j = i - 1; j >= 0; j--)
michael@0 479 ungetCharIgnoreEOL(cp[j]);
michael@0 480 return i == n;
michael@0 481 }
michael@0 482
michael@0 483 const jschar *
michael@0 484 TokenStream::TokenBuf::findEOLMax(const jschar *p, size_t max)
michael@0 485 {
michael@0 486 JS_ASSERT(base_ <= p && p <= limit_);
michael@0 487
michael@0 488 size_t n = 0;
michael@0 489 while (true) {
michael@0 490 if (p >= limit_)
michael@0 491 break;
michael@0 492 if (n >= max)
michael@0 493 break;
michael@0 494 if (TokenBuf::isRawEOLChar(*p++))
michael@0 495 break;
michael@0 496 n++;
michael@0 497 }
michael@0 498 return p;
michael@0 499 }
michael@0 500
michael@0 501 void
michael@0 502 TokenStream::advance(size_t position)
michael@0 503 {
michael@0 504 const jschar *end = userbuf.base() + position;
michael@0 505 while (userbuf.addressOfNextRawChar() < end)
michael@0 506 getChar();
michael@0 507
michael@0 508 Token *cur = &tokens[cursor];
michael@0 509 cur->pos.begin = userbuf.addressOfNextRawChar() - userbuf.base();
michael@0 510 cur->type = TOK_ERROR;
michael@0 511 lookahead = 0;
michael@0 512 }
michael@0 513
michael@0 514 void
michael@0 515 TokenStream::tell(Position *pos)
michael@0 516 {
michael@0 517 pos->buf = userbuf.addressOfNextRawChar(/* allowPoisoned = */ true);
michael@0 518 pos->flags = flags;
michael@0 519 pos->lineno = lineno;
michael@0 520 pos->linebase = linebase;
michael@0 521 pos->prevLinebase = prevLinebase;
michael@0 522 pos->lookahead = lookahead;
michael@0 523 pos->currentToken = currentToken();
michael@0 524 for (unsigned i = 0; i < lookahead; i++)
michael@0 525 pos->lookaheadTokens[i] = tokens[(cursor + 1 + i) & ntokensMask];
michael@0 526 }
michael@0 527
michael@0 528 void
michael@0 529 TokenStream::seek(const Position &pos)
michael@0 530 {
michael@0 531 userbuf.setAddressOfNextRawChar(pos.buf, /* allowPoisoned = */ true);
michael@0 532 flags = pos.flags;
michael@0 533 lineno = pos.lineno;
michael@0 534 linebase = pos.linebase;
michael@0 535 prevLinebase = pos.prevLinebase;
michael@0 536 lookahead = pos.lookahead;
michael@0 537
michael@0 538 tokens[cursor] = pos.currentToken;
michael@0 539 for (unsigned i = 0; i < lookahead; i++)
michael@0 540 tokens[(cursor + 1 + i) & ntokensMask] = pos.lookaheadTokens[i];
michael@0 541 }
michael@0 542
michael@0 543 bool
michael@0 544 TokenStream::seek(const Position &pos, const TokenStream &other)
michael@0 545 {
michael@0 546 if (!srcCoords.fill(other.srcCoords))
michael@0 547 return false;
michael@0 548 seek(pos);
michael@0 549 return true;
michael@0 550 }
michael@0 551
michael@0 552 bool
michael@0 553 TokenStream::reportStrictModeErrorNumberVA(uint32_t offset, bool strictMode, unsigned errorNumber,
michael@0 554 va_list args)
michael@0 555 {
michael@0 556 // In strict mode code, this is an error, not merely a warning.
michael@0 557 unsigned flags = JSREPORT_STRICT;
michael@0 558 if (strictMode)
michael@0 559 flags |= JSREPORT_ERROR;
michael@0 560 else if (options().extraWarningsOption)
michael@0 561 flags |= JSREPORT_WARNING;
michael@0 562 else
michael@0 563 return true;
michael@0 564
michael@0 565 return reportCompileErrorNumberVA(offset, flags, errorNumber, args);
michael@0 566 }
michael@0 567
michael@0 568 void
michael@0 569 CompileError::throwError(JSContext *cx)
michael@0 570 {
michael@0 571 // If there's a runtime exception type associated with this error
michael@0 572 // number, set that as the pending exception. For errors occuring at
michael@0 573 // compile time, this is very likely to be a JSEXN_SYNTAXERR.
michael@0 574 //
michael@0 575 // If an exception is thrown but not caught, the JSREPORT_EXCEPTION
michael@0 576 // flag will be set in report.flags. Proper behavior for an error
michael@0 577 // reporter is to ignore a report with this flag for all but top-level
michael@0 578 // compilation errors. The exception will remain pending, and so long
michael@0 579 // as the non-top-level "load", "eval", or "compile" native function
michael@0 580 // returns false, the top-level reporter will eventually receive the
michael@0 581 // uncaught exception report.
michael@0 582 if (!js_ErrorToException(cx, message, &report, nullptr, nullptr))
michael@0 583 CallErrorReporter(cx, message, &report);
michael@0 584 }
michael@0 585
michael@0 586 CompileError::~CompileError()
michael@0 587 {
michael@0 588 js_free((void*)report.uclinebuf);
michael@0 589 js_free((void*)report.linebuf);
michael@0 590 js_free((void*)report.ucmessage);
michael@0 591 js_free(message);
michael@0 592 message = nullptr;
michael@0 593
michael@0 594 if (report.messageArgs) {
michael@0 595 if (argumentsType == ArgumentsAreASCII) {
michael@0 596 unsigned i = 0;
michael@0 597 while (report.messageArgs[i])
michael@0 598 js_free((void*)report.messageArgs[i++]);
michael@0 599 }
michael@0 600 js_free(report.messageArgs);
michael@0 601 }
michael@0 602
michael@0 603 PodZero(&report);
michael@0 604 }
michael@0 605
michael@0 606 bool
michael@0 607 TokenStream::reportCompileErrorNumberVA(uint32_t offset, unsigned flags, unsigned errorNumber,
michael@0 608 va_list args)
michael@0 609 {
michael@0 610 bool warning = JSREPORT_IS_WARNING(flags);
michael@0 611
michael@0 612 if (warning && options().werrorOption) {
michael@0 613 flags &= ~JSREPORT_WARNING;
michael@0 614 warning = false;
michael@0 615 }
michael@0 616
michael@0 617 // On the main thread, report the error immediately. When compiling off
michael@0 618 // thread, save the error so that the main thread can report it later.
michael@0 619 CompileError tempErr;
michael@0 620 CompileError &err = cx->isJSContext() ? tempErr : cx->addPendingCompileError();
michael@0 621
michael@0 622 err.report.flags = flags;
michael@0 623 err.report.errorNumber = errorNumber;
michael@0 624 err.report.filename = filename;
michael@0 625 err.report.originPrincipals = originPrincipals;
michael@0 626 if (offset == NoOffset) {
michael@0 627 err.report.lineno = 0;
michael@0 628 err.report.column = 0;
michael@0 629 } else {
michael@0 630 err.report.lineno = srcCoords.lineNum(offset);
michael@0 631 err.report.column = srcCoords.columnIndex(offset);
michael@0 632 }
michael@0 633
michael@0 634 err.argumentsType = (flags & JSREPORT_UC) ? ArgumentsAreUnicode : ArgumentsAreASCII;
michael@0 635
michael@0 636 if (!js_ExpandErrorArguments(cx, js_GetErrorMessage, nullptr, errorNumber, &err.message,
michael@0 637 &err.report, err.argumentsType, args))
michael@0 638 {
michael@0 639 return false;
michael@0 640 }
michael@0 641
michael@0 642 // Given a token, T, that we want to complain about: if T's (starting)
michael@0 643 // lineno doesn't match TokenStream's lineno, that means we've scanned past
michael@0 644 // the line that T starts on, which makes it hard to print some or all of
michael@0 645 // T's (starting) line for context.
michael@0 646 //
michael@0 647 // So we don't even try, leaving report.linebuf and friends zeroed. This
michael@0 648 // means that any error involving a multi-line token (e.g. an unterminated
michael@0 649 // multi-line string literal) won't have a context printed.
michael@0 650 if (offset != NoOffset && err.report.lineno == lineno) {
michael@0 651 const jschar *tokenStart = userbuf.base() + offset;
michael@0 652
michael@0 653 // We show only a portion (a "window") of the line around the erroneous
michael@0 654 // token -- the first char in the token, plus |windowRadius| chars
michael@0 655 // before it and |windowRadius - 1| chars after it. This is because
michael@0 656 // lines can be very long and printing the whole line is (a) not that
michael@0 657 // helpful, and (b) can waste a lot of memory. See bug 634444.
michael@0 658 static const size_t windowRadius = 60;
michael@0 659
michael@0 660 // Truncate at the front if necessary.
michael@0 661 const jschar *windowBase = (linebase + windowRadius < tokenStart)
michael@0 662 ? tokenStart - windowRadius
michael@0 663 : linebase;
michael@0 664 uint32_t windowOffset = tokenStart - windowBase;
michael@0 665
michael@0 666 // Find EOL, or truncate at the back if necessary.
michael@0 667 const jschar *windowLimit = userbuf.findEOLMax(tokenStart, windowRadius);
michael@0 668 size_t windowLength = windowLimit - windowBase;
michael@0 669 JS_ASSERT(windowLength <= windowRadius * 2);
michael@0 670
michael@0 671 // Create the windowed strings.
michael@0 672 StringBuffer windowBuf(cx);
michael@0 673 if (!windowBuf.append(windowBase, windowLength) || !windowBuf.append((jschar)0))
michael@0 674 return false;
michael@0 675
michael@0 676 // Unicode and char versions of the window into the offending source
michael@0 677 // line, without final \n.
michael@0 678 err.report.uclinebuf = windowBuf.extractWellSized();
michael@0 679 if (!err.report.uclinebuf)
michael@0 680 return false;
michael@0 681 TwoByteChars tbchars(err.report.uclinebuf, windowLength);
michael@0 682 err.report.linebuf = LossyTwoByteCharsToNewLatin1CharsZ(cx, tbchars).c_str();
michael@0 683 if (!err.report.linebuf)
michael@0 684 return false;
michael@0 685
michael@0 686 err.report.tokenptr = err.report.linebuf + windowOffset;
michael@0 687 err.report.uctokenptr = err.report.uclinebuf + windowOffset;
michael@0 688 }
michael@0 689
michael@0 690 if (cx->isJSContext())
michael@0 691 err.throwError(cx->asJSContext());
michael@0 692
michael@0 693 return warning;
michael@0 694 }
michael@0 695
michael@0 696 bool
michael@0 697 TokenStream::reportStrictModeError(unsigned errorNumber, ...)
michael@0 698 {
michael@0 699 va_list args;
michael@0 700 va_start(args, errorNumber);
michael@0 701 bool result = reportStrictModeErrorNumberVA(currentToken().pos.begin, strictMode(),
michael@0 702 errorNumber, args);
michael@0 703 va_end(args);
michael@0 704 return result;
michael@0 705 }
michael@0 706
michael@0 707 bool
michael@0 708 TokenStream::reportError(unsigned errorNumber, ...)
michael@0 709 {
michael@0 710 va_list args;
michael@0 711 va_start(args, errorNumber);
michael@0 712 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_ERROR, errorNumber,
michael@0 713 args);
michael@0 714 va_end(args);
michael@0 715 return result;
michael@0 716 }
michael@0 717
michael@0 718 bool
michael@0 719 TokenStream::reportWarning(unsigned errorNumber, ...)
michael@0 720 {
michael@0 721 va_list args;
michael@0 722 va_start(args, errorNumber);
michael@0 723 bool result = reportCompileErrorNumberVA(currentToken().pos.begin, JSREPORT_WARNING,
michael@0 724 errorNumber, args);
michael@0 725 va_end(args);
michael@0 726 return result;
michael@0 727 }
michael@0 728
michael@0 729 bool
michael@0 730 TokenStream::reportStrictWarningErrorNumberVA(uint32_t offset, unsigned errorNumber, va_list args)
michael@0 731 {
michael@0 732 if (!options().extraWarningsOption)
michael@0 733 return true;
michael@0 734
michael@0 735 return reportCompileErrorNumberVA(offset, JSREPORT_STRICT|JSREPORT_WARNING, errorNumber, args);
michael@0 736 }
michael@0 737
michael@0 738 void
michael@0 739 TokenStream::reportAsmJSError(uint32_t offset, unsigned errorNumber, ...)
michael@0 740 {
michael@0 741 va_list args;
michael@0 742 va_start(args, errorNumber);
michael@0 743 reportCompileErrorNumberVA(offset, JSREPORT_WARNING, errorNumber, args);
michael@0 744 va_end(args);
michael@0 745 }
michael@0 746
michael@0 747 // We have encountered a '\': check for a Unicode escape sequence after it.
michael@0 748 // Return 'true' and the character code value (by value) if we found a
michael@0 749 // Unicode escape sequence. Otherwise, return 'false'. In both cases, do not
michael@0 750 // advance along the buffer.
michael@0 751 bool
michael@0 752 TokenStream::peekUnicodeEscape(int *result)
michael@0 753 {
michael@0 754 jschar cp[5];
michael@0 755
michael@0 756 if (peekChars(5, cp) && cp[0] == 'u' &&
michael@0 757 JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
michael@0 758 JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
michael@0 759 {
michael@0 760 *result = (((((JS7_UNHEX(cp[1]) << 4)
michael@0 761 + JS7_UNHEX(cp[2])) << 4)
michael@0 762 + JS7_UNHEX(cp[3])) << 4)
michael@0 763 + JS7_UNHEX(cp[4]);
michael@0 764 return true;
michael@0 765 }
michael@0 766 return false;
michael@0 767 }
michael@0 768
michael@0 769 bool
michael@0 770 TokenStream::matchUnicodeEscapeIdStart(int32_t *cp)
michael@0 771 {
michael@0 772 if (peekUnicodeEscape(cp) && IsIdentifierStart(*cp)) {
michael@0 773 skipChars(5);
michael@0 774 return true;
michael@0 775 }
michael@0 776 return false;
michael@0 777 }
michael@0 778
michael@0 779 bool
michael@0 780 TokenStream::matchUnicodeEscapeIdent(int32_t *cp)
michael@0 781 {
michael@0 782 if (peekUnicodeEscape(cp) && IsIdentifierPart(*cp)) {
michael@0 783 skipChars(5);
michael@0 784 return true;
michael@0 785 }
michael@0 786 return false;
michael@0 787 }
michael@0 788
michael@0 789 // Helper function which returns true if the first length(q) characters in p are
michael@0 790 // the same as the characters in q.
michael@0 791 static bool
michael@0 792 CharsMatch(const jschar *p, const char *q) {
michael@0 793 while (*q) {
michael@0 794 if (*p++ != *q++)
michael@0 795 return false;
michael@0 796 }
michael@0 797 return true;
michael@0 798 }
michael@0 799
michael@0 800 bool
michael@0 801 TokenStream::getDirectives(bool isMultiline, bool shouldWarnDeprecated)
michael@0 802 {
michael@0 803 // Match directive comments used in debugging, such as "//# sourceURL" and
michael@0 804 // "//# sourceMappingURL". Use of "//@" instead of "//#" is deprecated.
michael@0 805 //
michael@0 806 // To avoid a crashing bug in IE, several JavaScript transpilers wrap single
michael@0 807 // line comments containing a source mapping URL inside a multiline
michael@0 808 // comment. To avoid potentially expensive lookahead and backtracking, we
michael@0 809 // only check for this case if we encounter a '#' character.
michael@0 810
michael@0 811 if (!getDisplayURL(isMultiline, shouldWarnDeprecated))
michael@0 812 return false;
michael@0 813 if (!getSourceMappingURL(isMultiline, shouldWarnDeprecated))
michael@0 814 return false;
michael@0 815
michael@0 816 return true;
michael@0 817 }
michael@0 818
michael@0 819 bool
michael@0 820 TokenStream::getDirective(bool isMultiline, bool shouldWarnDeprecated,
michael@0 821 const char *directive, int directiveLength,
michael@0 822 const char *errorMsgPragma, jschar **destination) {
michael@0 823 JS_ASSERT(directiveLength <= 18);
michael@0 824 jschar peeked[18];
michael@0 825 int32_t c;
michael@0 826
michael@0 827 if (peekChars(directiveLength, peeked) && CharsMatch(peeked, directive)) {
michael@0 828 if (shouldWarnDeprecated &&
michael@0 829 !reportWarning(JSMSG_DEPRECATED_PRAGMA, errorMsgPragma))
michael@0 830 return false;
michael@0 831
michael@0 832 skipChars(directiveLength);
michael@0 833 tokenbuf.clear();
michael@0 834
michael@0 835 while ((c = peekChar()) && c != EOF && !IsSpaceOrBOM2(c)) {
michael@0 836 getChar();
michael@0 837 // Debugging directives can occur in both single- and multi-line
michael@0 838 // comments. If we're currently inside a multi-line comment, we also
michael@0 839 // need to recognize multi-line comment terminators.
michael@0 840 if (isMultiline && c == '*' && peekChar() == '/') {
michael@0 841 ungetChar('*');
michael@0 842 break;
michael@0 843 }
michael@0 844 tokenbuf.append(c);
michael@0 845 }
michael@0 846
michael@0 847 if (tokenbuf.empty())
michael@0 848 // The directive's URL was missing, but this is not quite an
michael@0 849 // exception that we should stop and drop everything for.
michael@0 850 return true;
michael@0 851
michael@0 852 size_t length = tokenbuf.length();
michael@0 853
michael@0 854 js_free(*destination);
michael@0 855 *destination = cx->pod_malloc<jschar>(length + 1);
michael@0 856 if (!*destination)
michael@0 857 return false;
michael@0 858
michael@0 859 PodCopy(*destination, tokenbuf.begin(), length);
michael@0 860 (*destination)[length] = '\0';
michael@0 861 }
michael@0 862
michael@0 863 return true;
michael@0 864 }
michael@0 865
michael@0 866 bool
michael@0 867 TokenStream::getDisplayURL(bool isMultiline, bool shouldWarnDeprecated)
michael@0 868 {
michael@0 869 // Match comments of the form "//# sourceURL=<url>" or
michael@0 870 // "/\* //# sourceURL=<url> *\/"
michael@0 871 //
michael@0 872 // Note that while these are labeled "sourceURL" in the source text,
michael@0 873 // internally we refer to it as a "displayURL" to distinguish what the
michael@0 874 // developer would like to refer to the source as from the source's actual
michael@0 875 // URL.
michael@0 876
michael@0 877 return getDirective(isMultiline, shouldWarnDeprecated, " sourceURL=", 11,
michael@0 878 "sourceURL", &displayURL_);
michael@0 879 }
michael@0 880
michael@0 881 bool
michael@0 882 TokenStream::getSourceMappingURL(bool isMultiline, bool shouldWarnDeprecated)
michael@0 883 {
michael@0 884 // Match comments of the form "//# sourceMappingURL=<url>" or
michael@0 885 // "/\* //# sourceMappingURL=<url> *\/"
michael@0 886
michael@0 887 return getDirective(isMultiline, shouldWarnDeprecated, " sourceMappingURL=", 18,
michael@0 888 "sourceMappingURL", &sourceMapURL_);
michael@0 889 }
michael@0 890
michael@0 891 MOZ_ALWAYS_INLINE Token *
michael@0 892 TokenStream::newToken(ptrdiff_t adjust)
michael@0 893 {
michael@0 894 cursor = (cursor + 1) & ntokensMask;
michael@0 895 Token *tp = &tokens[cursor];
michael@0 896 tp->pos.begin = userbuf.addressOfNextRawChar() + adjust - userbuf.base();
michael@0 897
michael@0 898 // NOTE: tp->pos.end is not set until the very end of getTokenInternal().
michael@0 899 MOZ_MAKE_MEM_UNDEFINED(&tp->pos.end, sizeof(tp->pos.end));
michael@0 900
michael@0 901 return tp;
michael@0 902 }
michael@0 903
michael@0 904 MOZ_ALWAYS_INLINE JSAtom *
michael@0 905 TokenStream::atomize(ExclusiveContext *cx, CharBuffer &cb)
michael@0 906 {
michael@0 907 return AtomizeChars(cx, cb.begin(), cb.length());
michael@0 908 }
michael@0 909
michael@0 910 #ifdef DEBUG
michael@0 911 static bool
michael@0 912 IsTokenSane(Token *tp)
michael@0 913 {
michael@0 914 // Nb: TOK_EOL should never be used in an actual Token; it should only be
michael@0 915 // returned as a TokenKind from peekTokenSameLine().
michael@0 916 if (tp->type < TOK_ERROR || tp->type >= TOK_LIMIT || tp->type == TOK_EOL)
michael@0 917 return false;
michael@0 918
michael@0 919 if (tp->pos.end < tp->pos.begin)
michael@0 920 return false;
michael@0 921
michael@0 922 return true;
michael@0 923 }
michael@0 924 #endif
michael@0 925
michael@0 926 bool
michael@0 927 TokenStream::putIdentInTokenbuf(const jschar *identStart)
michael@0 928 {
michael@0 929 int32_t c, qc;
michael@0 930 const jschar *tmp = userbuf.addressOfNextRawChar();
michael@0 931 userbuf.setAddressOfNextRawChar(identStart);
michael@0 932
michael@0 933 tokenbuf.clear();
michael@0 934 for (;;) {
michael@0 935 c = getCharIgnoreEOL();
michael@0 936 if (!IsIdentifierPart(c)) {
michael@0 937 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
michael@0 938 break;
michael@0 939 c = qc;
michael@0 940 }
michael@0 941 if (!tokenbuf.append(c)) {
michael@0 942 userbuf.setAddressOfNextRawChar(tmp);
michael@0 943 return false;
michael@0 944 }
michael@0 945 }
michael@0 946 userbuf.setAddressOfNextRawChar(tmp);
michael@0 947 return true;
michael@0 948 }
michael@0 949
michael@0 950 bool
michael@0 951 TokenStream::checkForKeyword(const jschar *s, size_t length, TokenKind *ttp)
michael@0 952 {
michael@0 953 const KeywordInfo *kw = FindKeyword(s, length);
michael@0 954 if (!kw)
michael@0 955 return true;
michael@0 956
michael@0 957 if (kw->tokentype == TOK_RESERVED)
michael@0 958 return reportError(JSMSG_RESERVED_ID, kw->chars);
michael@0 959
michael@0 960 if (kw->tokentype != TOK_STRICT_RESERVED) {
michael@0 961 if (kw->version <= versionNumber()) {
michael@0 962 // Working keyword.
michael@0 963 if (ttp) {
michael@0 964 *ttp = kw->tokentype;
michael@0 965 return true;
michael@0 966 }
michael@0 967 return reportError(JSMSG_RESERVED_ID, kw->chars);
michael@0 968 }
michael@0 969
michael@0 970 // The keyword is not in this version. Treat it as an identifier, unless
michael@0 971 // it is let which we treat as TOK_STRICT_RESERVED by falling through to
michael@0 972 // the code below (ES5 forbids it in strict mode).
michael@0 973 if (kw->tokentype != TOK_LET)
michael@0 974 return true;
michael@0 975 }
michael@0 976
michael@0 977 // Strict reserved word.
michael@0 978 return reportStrictModeError(JSMSG_RESERVED_ID, kw->chars);
michael@0 979 }
michael@0 980
michael@0 981 enum FirstCharKind {
michael@0 982 // A jschar has the 'OneChar' kind if it, by itself, constitutes a valid
michael@0 983 // token that cannot also be a prefix of a longer token. E.g. ';' has the
michael@0 984 // OneChar kind, but '+' does not, because '++' and '+=' are valid longer tokens
michael@0 985 // that begin with '+'.
michael@0 986 //
michael@0 987 // The few token kinds satisfying these properties cover roughly 35--45%
michael@0 988 // of the tokens seen in practice.
michael@0 989 //
michael@0 990 // We represent the 'OneChar' kind with any positive value less than
michael@0 991 // TOK_LIMIT. This representation lets us associate each one-char token
michael@0 992 // jschar with a TokenKind and thus avoid a subsequent jschar-to-TokenKind
michael@0 993 // conversion.
michael@0 994 OneChar_Min = 0,
michael@0 995 OneChar_Max = TOK_LIMIT - 1,
michael@0 996
michael@0 997 Space = TOK_LIMIT,
michael@0 998 Ident,
michael@0 999 Dec,
michael@0 1000 String,
michael@0 1001 EOL,
michael@0 1002 BasePrefix,
michael@0 1003 Other,
michael@0 1004
michael@0 1005 LastCharKind = Other
michael@0 1006 };
michael@0 1007
michael@0 1008 // OneChar: 40, 41, 44, 58, 59, 63, 91, 93, 123, 125, 126:
michael@0 1009 // '(', ')', ',', ':', ';', '?', '[', ']', '{', '}', '~'
michael@0 1010 // Ident: 36, 65..90, 95, 97..122: '$', 'A'..'Z', '_', 'a'..'z'
michael@0 1011 // Dot: 46: '.'
michael@0 1012 // Equals: 61: '='
michael@0 1013 // String: 34, 39: '"', '\''
michael@0 1014 // Dec: 49..57: '1'..'9'
michael@0 1015 // Plus: 43: '+'
michael@0 1016 // BasePrefix: 48: '0'
michael@0 1017 // Space: 9, 11, 12, 32: '\t', '\v', '\f', ' '
michael@0 1018 // EOL: 10, 13: '\n', '\r'
michael@0 1019 //
michael@0 1020 #define T_COMMA TOK_COMMA
michael@0 1021 #define T_COLON TOK_COLON
michael@0 1022 #define T_BITNOT TOK_BITNOT
michael@0 1023 #define _______ Other
michael@0 1024 static const uint8_t firstCharKinds[] = {
michael@0 1025 /* 0 1 2 3 4 5 6 7 8 9 */
michael@0 1026 /* 0+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, Space,
michael@0 1027 /* 10+ */ EOL, Space, Space, EOL, _______, _______, _______, _______, _______, _______,
michael@0 1028 /* 20+ */ _______, _______, _______, _______, _______, _______, _______, _______, _______, _______,
michael@0 1029 /* 30+ */ _______, _______, Space, _______, String, _______, Ident, _______, _______, String,
michael@0 1030 /* 40+ */ TOK_LP, TOK_RP, _______, _______, T_COMMA,_______, _______, _______,BasePrefix, Dec,
michael@0 1031 /* 50+ */ Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, T_COLON,TOK_SEMI,
michael@0 1032 /* 60+ */ _______, _______, _______,TOK_HOOK, _______, Ident, Ident, Ident, Ident, Ident,
michael@0 1033 /* 70+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
michael@0 1034 /* 80+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
michael@0 1035 /* 90+ */ Ident, TOK_LB, _______, TOK_RB, _______, Ident, _______, Ident, Ident, Ident,
michael@0 1036 /* 100+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
michael@0 1037 /* 110+ */ Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident, Ident,
michael@0 1038 /* 120+ */ Ident, Ident, Ident, TOK_LC, _______, TOK_RC,T_BITNOT, _______
michael@0 1039 };
michael@0 1040 #undef T_COMMA
michael@0 1041 #undef T_COLON
michael@0 1042 #undef T_BITNOT
michael@0 1043 #undef _______
michael@0 1044
michael@0 1045 static_assert(LastCharKind < (1 << (sizeof(firstCharKinds[0]) * 8)),
michael@0 1046 "Elements of firstCharKinds[] are too small");
michael@0 1047
michael@0 1048 TokenKind
michael@0 1049 TokenStream::getTokenInternal(Modifier modifier)
michael@0 1050 {
michael@0 1051 int c, qc;
michael@0 1052 Token *tp;
michael@0 1053 FirstCharKind c1kind;
michael@0 1054 const jschar *numStart;
michael@0 1055 bool hasExp;
michael@0 1056 DecimalPoint decimalPoint;
michael@0 1057 const jschar *identStart;
michael@0 1058 bool hadUnicodeEscape;
michael@0 1059
michael@0 1060 retry:
michael@0 1061 if (MOZ_UNLIKELY(!userbuf.hasRawChars())) {
michael@0 1062 tp = newToken(0);
michael@0 1063 tp->type = TOK_EOF;
michael@0 1064 flags.isEOF = true;
michael@0 1065 goto out;
michael@0 1066 }
michael@0 1067
michael@0 1068 c = userbuf.getRawChar();
michael@0 1069 JS_ASSERT(c != EOF);
michael@0 1070
michael@0 1071 // Chars not in the range 0..127 are rare. Getting them out of the way
michael@0 1072 // early allows subsequent checking to be faster.
michael@0 1073 if (MOZ_UNLIKELY(c >= 128)) {
michael@0 1074 if (IsSpaceOrBOM2(c)) {
michael@0 1075 if (c == LINE_SEPARATOR || c == PARA_SEPARATOR) {
michael@0 1076 updateLineInfoForEOL();
michael@0 1077 updateFlagsForEOL();
michael@0 1078 }
michael@0 1079
michael@0 1080 goto retry;
michael@0 1081 }
michael@0 1082
michael@0 1083 tp = newToken(-1);
michael@0 1084
michael@0 1085 // '$' and '_' don't pass IsLetter, but they're < 128 so never appear here.
michael@0 1086 JS_STATIC_ASSERT('$' < 128 && '_' < 128);
michael@0 1087 if (IsLetter(c)) {
michael@0 1088 identStart = userbuf.addressOfNextRawChar() - 1;
michael@0 1089 hadUnicodeEscape = false;
michael@0 1090 goto identifier;
michael@0 1091 }
michael@0 1092
michael@0 1093 goto badchar;
michael@0 1094 }
michael@0 1095
michael@0 1096 // Get the token kind, based on the first char. The ordering of c1kind
michael@0 1097 // comparison is based on the frequency of tokens in real code -- Parsemark
michael@0 1098 // (which represents typical JS code on the web) and the Unreal demo (which
michael@0 1099 // represents asm.js code).
michael@0 1100 //
michael@0 1101 // Parsemark Unreal
michael@0 1102 // OneChar 32.9% 39.7%
michael@0 1103 // Space 25.0% 0.6%
michael@0 1104 // Ident 19.2% 36.4%
michael@0 1105 // Dec 7.2% 5.1%
michael@0 1106 // String 7.9% 0.0%
michael@0 1107 // EOL 1.7% 0.0%
michael@0 1108 // BasePrefix 0.4% 4.9%
michael@0 1109 // Other 5.7% 13.3%
michael@0 1110 //
michael@0 1111 // The ordering is based mostly only Parsemark frequencies, with Unreal
michael@0 1112 // frequencies used to break close categories (e.g. |Dec| and |String|).
michael@0 1113 // |Other| is biggish, but no other token kind is common enough for it to
michael@0 1114 // be worth adding extra values to FirstCharKind.
michael@0 1115 //
michael@0 1116 c1kind = FirstCharKind(firstCharKinds[c]);
michael@0 1117
michael@0 1118 // Look for an unambiguous single-char token.
michael@0 1119 //
michael@0 1120 if (c1kind <= OneChar_Max) {
michael@0 1121 tp = newToken(-1);
michael@0 1122 tp->type = TokenKind(c1kind);
michael@0 1123 goto out;
michael@0 1124 }
michael@0 1125
michael@0 1126 // Skip over non-EOL whitespace chars.
michael@0 1127 //
michael@0 1128 if (c1kind == Space)
michael@0 1129 goto retry;
michael@0 1130
michael@0 1131 // Look for an identifier.
michael@0 1132 //
michael@0 1133 if (c1kind == Ident) {
michael@0 1134 tp = newToken(-1);
michael@0 1135 identStart = userbuf.addressOfNextRawChar() - 1;
michael@0 1136 hadUnicodeEscape = false;
michael@0 1137
michael@0 1138 identifier:
michael@0 1139 for (;;) {
michael@0 1140 c = getCharIgnoreEOL();
michael@0 1141 if (c == EOF)
michael@0 1142 break;
michael@0 1143 if (!IsIdentifierPart(c)) {
michael@0 1144 if (c != '\\' || !matchUnicodeEscapeIdent(&qc))
michael@0 1145 break;
michael@0 1146 hadUnicodeEscape = true;
michael@0 1147 }
michael@0 1148 }
michael@0 1149 ungetCharIgnoreEOL(c);
michael@0 1150
michael@0 1151 // Identifiers containing no Unicode escapes can be processed directly
michael@0 1152 // from userbuf. The rest must use the escapes converted via tokenbuf
michael@0 1153 // before atomizing.
michael@0 1154 const jschar *chars;
michael@0 1155 size_t length;
michael@0 1156 if (hadUnicodeEscape) {
michael@0 1157 if (!putIdentInTokenbuf(identStart))
michael@0 1158 goto error;
michael@0 1159
michael@0 1160 chars = tokenbuf.begin();
michael@0 1161 length = tokenbuf.length();
michael@0 1162 } else {
michael@0 1163 chars = identStart;
michael@0 1164 length = userbuf.addressOfNextRawChar() - identStart;
michael@0 1165 }
michael@0 1166
michael@0 1167 // Check for keywords unless the parser told us not to.
michael@0 1168 if (modifier != KeywordIsName) {
michael@0 1169 tp->type = TOK_NAME;
michael@0 1170 if (!checkForKeyword(chars, length, &tp->type))
michael@0 1171 goto error;
michael@0 1172 if (tp->type != TOK_NAME)
michael@0 1173 goto out;
michael@0 1174 }
michael@0 1175
michael@0 1176 JSAtom *atom = AtomizeChars(cx, chars, length);
michael@0 1177 if (!atom)
michael@0 1178 goto error;
michael@0 1179 tp->type = TOK_NAME;
michael@0 1180 tp->setName(atom->asPropertyName());
michael@0 1181 goto out;
michael@0 1182 }
michael@0 1183
michael@0 1184 // Look for a decimal number.
michael@0 1185 //
michael@0 1186 if (c1kind == Dec) {
michael@0 1187 tp = newToken(-1);
michael@0 1188 numStart = userbuf.addressOfNextRawChar() - 1;
michael@0 1189
michael@0 1190 decimal:
michael@0 1191 decimalPoint = NoDecimal;
michael@0 1192 hasExp = false;
michael@0 1193 while (JS7_ISDEC(c))
michael@0 1194 c = getCharIgnoreEOL();
michael@0 1195
michael@0 1196 if (c == '.') {
michael@0 1197 decimalPoint = HasDecimal;
michael@0 1198 decimal_dot:
michael@0 1199 do {
michael@0 1200 c = getCharIgnoreEOL();
michael@0 1201 } while (JS7_ISDEC(c));
michael@0 1202 }
michael@0 1203 if (c == 'e' || c == 'E') {
michael@0 1204 hasExp = true;
michael@0 1205 c = getCharIgnoreEOL();
michael@0 1206 if (c == '+' || c == '-')
michael@0 1207 c = getCharIgnoreEOL();
michael@0 1208 if (!JS7_ISDEC(c)) {
michael@0 1209 ungetCharIgnoreEOL(c);
michael@0 1210 reportError(JSMSG_MISSING_EXPONENT);
michael@0 1211 goto error;
michael@0 1212 }
michael@0 1213 do {
michael@0 1214 c = getCharIgnoreEOL();
michael@0 1215 } while (JS7_ISDEC(c));
michael@0 1216 }
michael@0 1217 ungetCharIgnoreEOL(c);
michael@0 1218
michael@0 1219 if (c != EOF && IsIdentifierStart(c)) {
michael@0 1220 reportError(JSMSG_IDSTART_AFTER_NUMBER);
michael@0 1221 goto error;
michael@0 1222 }
michael@0 1223
michael@0 1224 // Unlike identifiers and strings, numbers cannot contain escaped
michael@0 1225 // chars, so we don't need to use tokenbuf. Instead we can just
michael@0 1226 // convert the jschars in userbuf directly to the numeric value.
michael@0 1227 double dval;
michael@0 1228 if (!((decimalPoint == HasDecimal) || hasExp)) {
michael@0 1229 if (!GetDecimalInteger(cx, numStart, userbuf.addressOfNextRawChar(), &dval))
michael@0 1230 goto error;
michael@0 1231 } else {
michael@0 1232 const jschar *dummy;
michael@0 1233 if (!js_strtod(cx, numStart, userbuf.addressOfNextRawChar(), &dummy, &dval))
michael@0 1234 goto error;
michael@0 1235 }
michael@0 1236 tp->type = TOK_NUMBER;
michael@0 1237 tp->setNumber(dval, decimalPoint);
michael@0 1238 goto out;
michael@0 1239 }
michael@0 1240
michael@0 1241 // Look for a string.
michael@0 1242 //
michael@0 1243 if (c1kind == String) {
michael@0 1244 tp = newToken(-1);
michael@0 1245 qc = c;
michael@0 1246 tokenbuf.clear();
michael@0 1247 while (true) {
michael@0 1248 // We need to detect any of these chars: " or ', \n (or its
michael@0 1249 // equivalents), \\, EOF. We use maybeStrSpecial[] in a manner
michael@0 1250 // similar to maybeEOL[], see above. Because we detect EOL
michael@0 1251 // sequences here and put them back immediately, we can use
michael@0 1252 // getCharIgnoreEOL().
michael@0 1253 c = getCharIgnoreEOL();
michael@0 1254 if (maybeStrSpecial[c & 0xff]) {
michael@0 1255 if (c == qc)
michael@0 1256 break;
michael@0 1257 if (c == '\\') {
michael@0 1258 switch (c = getChar()) {
michael@0 1259 case 'b': c = '\b'; break;
michael@0 1260 case 'f': c = '\f'; break;
michael@0 1261 case 'n': c = '\n'; break;
michael@0 1262 case 'r': c = '\r'; break;
michael@0 1263 case 't': c = '\t'; break;
michael@0 1264 case 'v': c = '\v'; break;
michael@0 1265
michael@0 1266 default:
michael@0 1267 if ('0' <= c && c < '8') {
michael@0 1268 int32_t val = JS7_UNDEC(c);
michael@0 1269
michael@0 1270 c = peekChar();
michael@0 1271 // Strict mode code allows only \0, then a non-digit.
michael@0 1272 if (val != 0 || JS7_ISDEC(c)) {
michael@0 1273 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
michael@0 1274 goto error;
michael@0 1275 flags.sawOctalEscape = true;
michael@0 1276 }
michael@0 1277 if ('0' <= c && c < '8') {
michael@0 1278 val = 8 * val + JS7_UNDEC(c);
michael@0 1279 getChar();
michael@0 1280 c = peekChar();
michael@0 1281 if ('0' <= c && c < '8') {
michael@0 1282 int32_t save = val;
michael@0 1283 val = 8 * val + JS7_UNDEC(c);
michael@0 1284 if (val <= 0377)
michael@0 1285 getChar();
michael@0 1286 else
michael@0 1287 val = save;
michael@0 1288 }
michael@0 1289 }
michael@0 1290
michael@0 1291 c = jschar(val);
michael@0 1292 } else if (c == 'u') {
michael@0 1293 jschar cp[4];
michael@0 1294 if (peekChars(4, cp) &&
michael@0 1295 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
michael@0 1296 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
michael@0 1297 c = (((((JS7_UNHEX(cp[0]) << 4)
michael@0 1298 + JS7_UNHEX(cp[1])) << 4)
michael@0 1299 + JS7_UNHEX(cp[2])) << 4)
michael@0 1300 + JS7_UNHEX(cp[3]);
michael@0 1301 skipChars(4);
michael@0 1302 } else {
michael@0 1303 reportError(JSMSG_MALFORMED_ESCAPE, "Unicode");
michael@0 1304 goto error;
michael@0 1305 }
michael@0 1306 } else if (c == 'x') {
michael@0 1307 jschar cp[2];
michael@0 1308 if (peekChars(2, cp) &&
michael@0 1309 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
michael@0 1310 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
michael@0 1311 skipChars(2);
michael@0 1312 } else {
michael@0 1313 reportError(JSMSG_MALFORMED_ESCAPE, "hexadecimal");
michael@0 1314 goto error;
michael@0 1315 }
michael@0 1316 } else if (c == '\n') {
michael@0 1317 // ES5 7.8.4: an escaped line terminator represents
michael@0 1318 // no character.
michael@0 1319 continue;
michael@0 1320 }
michael@0 1321 break;
michael@0 1322 }
michael@0 1323 } else if (TokenBuf::isRawEOLChar(c) || c == EOF) {
michael@0 1324 ungetCharIgnoreEOL(c);
michael@0 1325 reportError(JSMSG_UNTERMINATED_STRING);
michael@0 1326 goto error;
michael@0 1327 }
michael@0 1328 }
michael@0 1329 if (!tokenbuf.append(c))
michael@0 1330 goto error;
michael@0 1331 }
michael@0 1332 JSAtom *atom = atomize(cx, tokenbuf);
michael@0 1333 if (!atom)
michael@0 1334 goto error;
michael@0 1335 tp->type = TOK_STRING;
michael@0 1336 tp->setAtom(atom);
michael@0 1337 goto out;
michael@0 1338 }
michael@0 1339
michael@0 1340 // Skip over EOL chars, updating line state along the way.
michael@0 1341 //
michael@0 1342 if (c1kind == EOL) {
michael@0 1343 // If it's a \r\n sequence: treat as a single EOL, skip over the \n.
michael@0 1344 if (c == '\r' && userbuf.hasRawChars())
michael@0 1345 userbuf.matchRawChar('\n');
michael@0 1346 updateLineInfoForEOL();
michael@0 1347 updateFlagsForEOL();
michael@0 1348 goto retry;
michael@0 1349 }
michael@0 1350
michael@0 1351 // Look for a hexadecimal, octal, or binary number.
michael@0 1352 //
michael@0 1353 if (c1kind == BasePrefix) {
michael@0 1354 tp = newToken(-1);
michael@0 1355 int radix;
michael@0 1356 c = getCharIgnoreEOL();
michael@0 1357 if (c == 'x' || c == 'X') {
michael@0 1358 radix = 16;
michael@0 1359 c = getCharIgnoreEOL();
michael@0 1360 if (!JS7_ISHEX(c)) {
michael@0 1361 ungetCharIgnoreEOL(c);
michael@0 1362 reportError(JSMSG_MISSING_HEXDIGITS);
michael@0 1363 goto error;
michael@0 1364 }
michael@0 1365 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0x'
michael@0 1366 while (JS7_ISHEX(c))
michael@0 1367 c = getCharIgnoreEOL();
michael@0 1368 } else if (c == 'b' || c == 'B') {
michael@0 1369 radix = 2;
michael@0 1370 c = getCharIgnoreEOL();
michael@0 1371 if (c != '0' && c != '1') {
michael@0 1372 ungetCharIgnoreEOL(c);
michael@0 1373 reportError(JSMSG_MISSING_BINARY_DIGITS);
michael@0 1374 goto error;
michael@0 1375 }
michael@0 1376 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0b'
michael@0 1377 while (c == '0' || c == '1')
michael@0 1378 c = getCharIgnoreEOL();
michael@0 1379 } else if (c == 'o' || c == 'O') {
michael@0 1380 radix = 8;
michael@0 1381 c = getCharIgnoreEOL();
michael@0 1382 if (c < '0' || c > '7') {
michael@0 1383 ungetCharIgnoreEOL(c);
michael@0 1384 reportError(JSMSG_MISSING_OCTAL_DIGITS);
michael@0 1385 goto error;
michael@0 1386 }
michael@0 1387 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0o'
michael@0 1388 while ('0' <= c && c <= '7')
michael@0 1389 c = getCharIgnoreEOL();
michael@0 1390 } else if (JS7_ISDEC(c)) {
michael@0 1391 radix = 8;
michael@0 1392 numStart = userbuf.addressOfNextRawChar() - 1; // one past the '0'
michael@0 1393 while (JS7_ISDEC(c)) {
michael@0 1394 // Octal integer literals are not permitted in strict mode code.
michael@0 1395 if (!reportStrictModeError(JSMSG_DEPRECATED_OCTAL))
michael@0 1396 goto error;
michael@0 1397
michael@0 1398 // Outside strict mode, we permit 08 and 09 as decimal numbers,
michael@0 1399 // which makes our behaviour a superset of the ECMA numeric
michael@0 1400 // grammar. We might not always be so permissive, so we warn
michael@0 1401 // about it.
michael@0 1402 if (c >= '8') {
michael@0 1403 if (!reportWarning(JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
michael@0 1404 goto error;
michael@0 1405 }
michael@0 1406 goto decimal; // use the decimal scanner for the rest of the number
michael@0 1407 }
michael@0 1408 c = getCharIgnoreEOL();
michael@0 1409 }
michael@0 1410 } else {
michael@0 1411 // '0' not followed by 'x', 'X' or a digit; scan as a decimal number.
michael@0 1412 numStart = userbuf.addressOfNextRawChar() - 1;
michael@0 1413 goto decimal;
michael@0 1414 }
michael@0 1415 ungetCharIgnoreEOL(c);
michael@0 1416
michael@0 1417 if (c != EOF && IsIdentifierStart(c)) {
michael@0 1418 reportError(JSMSG_IDSTART_AFTER_NUMBER);
michael@0 1419 goto error;
michael@0 1420 }
michael@0 1421
michael@0 1422 double dval;
michael@0 1423 const jschar *dummy;
michael@0 1424 if (!GetPrefixInteger(cx, numStart, userbuf.addressOfNextRawChar(), radix, &dummy, &dval))
michael@0 1425 goto error;
michael@0 1426 tp->type = TOK_NUMBER;
michael@0 1427 tp->setNumber(dval, NoDecimal);
michael@0 1428 goto out;
michael@0 1429 }
michael@0 1430
michael@0 1431 // This handles everything else.
michael@0 1432 //
michael@0 1433 JS_ASSERT(c1kind == Other);
michael@0 1434 tp = newToken(-1);
michael@0 1435 switch (c) {
michael@0 1436 case '.':
michael@0 1437 c = getCharIgnoreEOL();
michael@0 1438 if (JS7_ISDEC(c)) {
michael@0 1439 numStart = userbuf.addressOfNextRawChar() - 2;
michael@0 1440 decimalPoint = HasDecimal;
michael@0 1441 hasExp = false;
michael@0 1442 goto decimal_dot;
michael@0 1443 }
michael@0 1444 if (c == '.') {
michael@0 1445 if (matchChar('.')) {
michael@0 1446 tp->type = TOK_TRIPLEDOT;
michael@0 1447 goto out;
michael@0 1448 }
michael@0 1449 }
michael@0 1450 ungetCharIgnoreEOL(c);
michael@0 1451 tp->type = TOK_DOT;
michael@0 1452 goto out;
michael@0 1453
michael@0 1454 case '=':
michael@0 1455 if (matchChar('='))
michael@0 1456 tp->type = matchChar('=') ? TOK_STRICTEQ : TOK_EQ;
michael@0 1457 else if (matchChar('>'))
michael@0 1458 tp->type = TOK_ARROW;
michael@0 1459 else
michael@0 1460 tp->type = TOK_ASSIGN;
michael@0 1461 goto out;
michael@0 1462
michael@0 1463 case '+':
michael@0 1464 if (matchChar('+'))
michael@0 1465 tp->type = TOK_INC;
michael@0 1466 else
michael@0 1467 tp->type = matchChar('=') ? TOK_ADDASSIGN : TOK_ADD;
michael@0 1468 goto out;
michael@0 1469
michael@0 1470 case '\\':
michael@0 1471 hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc);
michael@0 1472 if (hadUnicodeEscape) {
michael@0 1473 identStart = userbuf.addressOfNextRawChar() - 6;
michael@0 1474 goto identifier;
michael@0 1475 }
michael@0 1476 goto badchar;
michael@0 1477
michael@0 1478 case '|':
michael@0 1479 if (matchChar('|'))
michael@0 1480 tp->type = TOK_OR;
michael@0 1481 else
michael@0 1482 tp->type = matchChar('=') ? TOK_BITORASSIGN : TOK_BITOR;
michael@0 1483 goto out;
michael@0 1484
michael@0 1485 case '^':
michael@0 1486 tp->type = matchChar('=') ? TOK_BITXORASSIGN : TOK_BITXOR;
michael@0 1487 goto out;
michael@0 1488
michael@0 1489 case '&':
michael@0 1490 if (matchChar('&'))
michael@0 1491 tp->type = TOK_AND;
michael@0 1492 else
michael@0 1493 tp->type = matchChar('=') ? TOK_BITANDASSIGN : TOK_BITAND;
michael@0 1494 goto out;
michael@0 1495
michael@0 1496 case '!':
michael@0 1497 if (matchChar('='))
michael@0 1498 tp->type = matchChar('=') ? TOK_STRICTNE : TOK_NE;
michael@0 1499 else
michael@0 1500 tp->type = TOK_NOT;
michael@0 1501 goto out;
michael@0 1502
michael@0 1503 case '<':
michael@0 1504 // NB: treat HTML begin-comment as comment-till-end-of-line.
michael@0 1505 if (matchChar('!')) {
michael@0 1506 if (matchChar('-')) {
michael@0 1507 if (matchChar('-'))
michael@0 1508 goto skipline;
michael@0 1509 ungetChar('-');
michael@0 1510 }
michael@0 1511 ungetChar('!');
michael@0 1512 }
michael@0 1513 if (matchChar('<')) {
michael@0 1514 tp->type = matchChar('=') ? TOK_LSHASSIGN : TOK_LSH;
michael@0 1515 } else {
michael@0 1516 tp->type = matchChar('=') ? TOK_LE : TOK_LT;
michael@0 1517 }
michael@0 1518 goto out;
michael@0 1519
michael@0 1520 case '>':
michael@0 1521 if (matchChar('>')) {
michael@0 1522 if (matchChar('>'))
michael@0 1523 tp->type = matchChar('=') ? TOK_URSHASSIGN : TOK_URSH;
michael@0 1524 else
michael@0 1525 tp->type = matchChar('=') ? TOK_RSHASSIGN : TOK_RSH;
michael@0 1526 } else {
michael@0 1527 tp->type = matchChar('=') ? TOK_GE : TOK_GT;
michael@0 1528 }
michael@0 1529 goto out;
michael@0 1530
michael@0 1531 case '*':
michael@0 1532 tp->type = matchChar('=') ? TOK_MULASSIGN : TOK_MUL;
michael@0 1533 goto out;
michael@0 1534
michael@0 1535 case '/':
michael@0 1536 // Look for a single-line comment.
michael@0 1537 if (matchChar('/')) {
michael@0 1538 c = peekChar();
michael@0 1539 if (c == '@' || c == '#') {
michael@0 1540 bool shouldWarn = getChar() == '@';
michael@0 1541 if (!getDirectives(false, shouldWarn))
michael@0 1542 goto error;
michael@0 1543 }
michael@0 1544
michael@0 1545 skipline:
michael@0 1546 while ((c = getChar()) != EOF && c != '\n')
michael@0 1547 continue;
michael@0 1548 ungetChar(c);
michael@0 1549 cursor = (cursor - 1) & ntokensMask;
michael@0 1550 goto retry;
michael@0 1551 }
michael@0 1552
michael@0 1553 // Look for a multi-line comment.
michael@0 1554 if (matchChar('*')) {
michael@0 1555 unsigned linenoBefore = lineno;
michael@0 1556 while ((c = getChar()) != EOF &&
michael@0 1557 !(c == '*' && matchChar('/'))) {
michael@0 1558 if (c == '@' || c == '#') {
michael@0 1559 bool shouldWarn = c == '@';
michael@0 1560 if (!getDirectives(true, shouldWarn))
michael@0 1561 goto error;
michael@0 1562 }
michael@0 1563 }
michael@0 1564 if (c == EOF) {
michael@0 1565 reportError(JSMSG_UNTERMINATED_COMMENT);
michael@0 1566 goto error;
michael@0 1567 }
michael@0 1568 if (linenoBefore != lineno)
michael@0 1569 updateFlagsForEOL();
michael@0 1570 cursor = (cursor - 1) & ntokensMask;
michael@0 1571 goto retry;
michael@0 1572 }
michael@0 1573
michael@0 1574 // Look for a regexp.
michael@0 1575 if (modifier == Operand) {
michael@0 1576 tokenbuf.clear();
michael@0 1577
michael@0 1578 bool inCharClass = false;
michael@0 1579 for (;;) {
michael@0 1580 c = getChar();
michael@0 1581 if (c == '\\') {
michael@0 1582 if (!tokenbuf.append(c))
michael@0 1583 goto error;
michael@0 1584 c = getChar();
michael@0 1585 } else if (c == '[') {
michael@0 1586 inCharClass = true;
michael@0 1587 } else if (c == ']') {
michael@0 1588 inCharClass = false;
michael@0 1589 } else if (c == '/' && !inCharClass) {
michael@0 1590 // For compat with IE, allow unescaped / in char classes.
michael@0 1591 break;
michael@0 1592 }
michael@0 1593 if (c == '\n' || c == EOF) {
michael@0 1594 ungetChar(c);
michael@0 1595 reportError(JSMSG_UNTERMINATED_REGEXP);
michael@0 1596 goto error;
michael@0 1597 }
michael@0 1598 if (!tokenbuf.append(c))
michael@0 1599 goto error;
michael@0 1600 }
michael@0 1601
michael@0 1602 RegExpFlag reflags = NoFlags;
michael@0 1603 unsigned length = tokenbuf.length() + 1;
michael@0 1604 while (true) {
michael@0 1605 c = peekChar();
michael@0 1606 if (c == 'g' && !(reflags & GlobalFlag))
michael@0 1607 reflags = RegExpFlag(reflags | GlobalFlag);
michael@0 1608 else if (c == 'i' && !(reflags & IgnoreCaseFlag))
michael@0 1609 reflags = RegExpFlag(reflags | IgnoreCaseFlag);
michael@0 1610 else if (c == 'm' && !(reflags & MultilineFlag))
michael@0 1611 reflags = RegExpFlag(reflags | MultilineFlag);
michael@0 1612 else if (c == 'y' && !(reflags & StickyFlag))
michael@0 1613 reflags = RegExpFlag(reflags | StickyFlag);
michael@0 1614 else
michael@0 1615 break;
michael@0 1616 getChar();
michael@0 1617 length++;
michael@0 1618 }
michael@0 1619
michael@0 1620 c = peekChar();
michael@0 1621 if (JS7_ISLET(c)) {
michael@0 1622 char buf[2] = { '\0', '\0' };
michael@0 1623 tp->pos.begin += length + 1;
michael@0 1624 buf[0] = char(c);
michael@0 1625 reportError(JSMSG_BAD_REGEXP_FLAG, buf);
michael@0 1626 (void) getChar();
michael@0 1627 goto error;
michael@0 1628 }
michael@0 1629 tp->type = TOK_REGEXP;
michael@0 1630 tp->setRegExpFlags(reflags);
michael@0 1631 goto out;
michael@0 1632 }
michael@0 1633
michael@0 1634 tp->type = matchChar('=') ? TOK_DIVASSIGN : TOK_DIV;
michael@0 1635 goto out;
michael@0 1636
michael@0 1637 case '%':
michael@0 1638 tp->type = matchChar('=') ? TOK_MODASSIGN : TOK_MOD;
michael@0 1639 goto out;
michael@0 1640
michael@0 1641 case '-':
michael@0 1642 if (matchChar('-')) {
michael@0 1643 if (peekChar() == '>' && !flags.isDirtyLine)
michael@0 1644 goto skipline;
michael@0 1645 tp->type = TOK_DEC;
michael@0 1646 } else {
michael@0 1647 tp->type = matchChar('=') ? TOK_SUBASSIGN : TOK_SUB;
michael@0 1648 }
michael@0 1649 goto out;
michael@0 1650
michael@0 1651 badchar:
michael@0 1652 default:
michael@0 1653 reportError(JSMSG_ILLEGAL_CHARACTER);
michael@0 1654 goto error;
michael@0 1655 }
michael@0 1656
michael@0 1657 MOZ_ASSUME_UNREACHABLE("should have jumped to |out| or |error|");
michael@0 1658
michael@0 1659 out:
michael@0 1660 flags.isDirtyLine = true;
michael@0 1661 tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
michael@0 1662 JS_ASSERT(IsTokenSane(tp));
michael@0 1663 return tp->type;
michael@0 1664
michael@0 1665 error:
michael@0 1666 flags.isDirtyLine = true;
michael@0 1667 tp->pos.end = userbuf.addressOfNextRawChar() - userbuf.base();
michael@0 1668 tp->type = TOK_ERROR;
michael@0 1669 JS_ASSERT(IsTokenSane(tp));
michael@0 1670 onError();
michael@0 1671 return TOK_ERROR;
michael@0 1672 }
michael@0 1673
michael@0 1674 void
michael@0 1675 TokenStream::onError()
michael@0 1676 {
michael@0 1677 flags.hadError = true;
michael@0 1678 #ifdef DEBUG
michael@0 1679 // Poisoning userbuf on error establishes an invariant: once an erroneous
michael@0 1680 // token has been seen, userbuf will not be consulted again. This is true
michael@0 1681 // because the parser will either (a) deal with the TOK_ERROR token by
michael@0 1682 // aborting parsing immediately; or (b) if the TOK_ERROR token doesn't
michael@0 1683 // match what it expected, it will unget the token, and the next getToken()
michael@0 1684 // call will immediately return the just-gotten TOK_ERROR token again
michael@0 1685 // without consulting userbuf, thanks to the lookahead buffer.
michael@0 1686 userbuf.poison();
michael@0 1687 #endif
michael@0 1688 }
michael@0 1689
michael@0 1690 JS_FRIEND_API(int)
michael@0 1691 js_fgets(char *buf, int size, FILE *file)
michael@0 1692 {
michael@0 1693 int n, i, c;
michael@0 1694 bool crflag;
michael@0 1695
michael@0 1696 n = size - 1;
michael@0 1697 if (n < 0)
michael@0 1698 return -1;
michael@0 1699
michael@0 1700 crflag = false;
michael@0 1701 for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
michael@0 1702 buf[i] = c;
michael@0 1703 if (c == '\n') { // any \n ends a line
michael@0 1704 i++; // keep the \n; we know there is room for \0
michael@0 1705 break;
michael@0 1706 }
michael@0 1707 if (crflag) { // \r not followed by \n ends line at the \r
michael@0 1708 ungetc(c, file);
michael@0 1709 break; // and overwrite c in buf with \0
michael@0 1710 }
michael@0 1711 crflag = (c == '\r');
michael@0 1712 }
michael@0 1713
michael@0 1714 buf[i] = '\0';
michael@0 1715 return i;
michael@0 1716 }
michael@0 1717
michael@0 1718 #ifdef DEBUG
michael@0 1719 const char *
michael@0 1720 TokenKindToString(TokenKind tt)
michael@0 1721 {
michael@0 1722 switch (tt) {
michael@0 1723 case TOK_ERROR: return "TOK_ERROR";
michael@0 1724 case TOK_EOF: return "TOK_EOF";
michael@0 1725 case TOK_EOL: return "TOK_EOL";
michael@0 1726 case TOK_SEMI: return "TOK_SEMI";
michael@0 1727 case TOK_COMMA: return "TOK_COMMA";
michael@0 1728 case TOK_HOOK: return "TOK_HOOK";
michael@0 1729 case TOK_COLON: return "TOK_COLON";
michael@0 1730 case TOK_OR: return "TOK_OR";
michael@0 1731 case TOK_AND: return "TOK_AND";
michael@0 1732 case TOK_BITOR: return "TOK_BITOR";
michael@0 1733 case TOK_BITXOR: return "TOK_BITXOR";
michael@0 1734 case TOK_BITAND: return "TOK_BITAND";
michael@0 1735 case TOK_ADD: return "TOK_ADD";
michael@0 1736 case TOK_SUB: return "TOK_SUB";
michael@0 1737 case TOK_MUL: return "TOK_MUL";
michael@0 1738 case TOK_DIV: return "TOK_DIV";
michael@0 1739 case TOK_MOD: return "TOK_MOD";
michael@0 1740 case TOK_INC: return "TOK_INC";
michael@0 1741 case TOK_DEC: return "TOK_DEC";
michael@0 1742 case TOK_DOT: return "TOK_DOT";
michael@0 1743 case TOK_TRIPLEDOT: return "TOK_TRIPLEDOT";
michael@0 1744 case TOK_LB: return "TOK_LB";
michael@0 1745 case TOK_RB: return "TOK_RB";
michael@0 1746 case TOK_LC: return "TOK_LC";
michael@0 1747 case TOK_RC: return "TOK_RC";
michael@0 1748 case TOK_LP: return "TOK_LP";
michael@0 1749 case TOK_RP: return "TOK_RP";
michael@0 1750 case TOK_ARROW: return "TOK_ARROW";
michael@0 1751 case TOK_NAME: return "TOK_NAME";
michael@0 1752 case TOK_NUMBER: return "TOK_NUMBER";
michael@0 1753 case TOK_STRING: return "TOK_STRING";
michael@0 1754 case TOK_REGEXP: return "TOK_REGEXP";
michael@0 1755 case TOK_TRUE: return "TOK_TRUE";
michael@0 1756 case TOK_FALSE: return "TOK_FALSE";
michael@0 1757 case TOK_NULL: return "TOK_NULL";
michael@0 1758 case TOK_THIS: return "TOK_THIS";
michael@0 1759 case TOK_FUNCTION: return "TOK_FUNCTION";
michael@0 1760 case TOK_IF: return "TOK_IF";
michael@0 1761 case TOK_ELSE: return "TOK_ELSE";
michael@0 1762 case TOK_SWITCH: return "TOK_SWITCH";
michael@0 1763 case TOK_CASE: return "TOK_CASE";
michael@0 1764 case TOK_DEFAULT: return "TOK_DEFAULT";
michael@0 1765 case TOK_WHILE: return "TOK_WHILE";
michael@0 1766 case TOK_DO: return "TOK_DO";
michael@0 1767 case TOK_FOR: return "TOK_FOR";
michael@0 1768 case TOK_BREAK: return "TOK_BREAK";
michael@0 1769 case TOK_CONTINUE: return "TOK_CONTINUE";
michael@0 1770 case TOK_IN: return "TOK_IN";
michael@0 1771 case TOK_VAR: return "TOK_VAR";
michael@0 1772 case TOK_CONST: return "TOK_CONST";
michael@0 1773 case TOK_WITH: return "TOK_WITH";
michael@0 1774 case TOK_RETURN: return "TOK_RETURN";
michael@0 1775 case TOK_NEW: return "TOK_NEW";
michael@0 1776 case TOK_DELETE: return "TOK_DELETE";
michael@0 1777 case TOK_TRY: return "TOK_TRY";
michael@0 1778 case TOK_CATCH: return "TOK_CATCH";
michael@0 1779 case TOK_FINALLY: return "TOK_FINALLY";
michael@0 1780 case TOK_THROW: return "TOK_THROW";
michael@0 1781 case TOK_INSTANCEOF: return "TOK_INSTANCEOF";
michael@0 1782 case TOK_DEBUGGER: return "TOK_DEBUGGER";
michael@0 1783 case TOK_YIELD: return "TOK_YIELD";
michael@0 1784 case TOK_LET: return "TOK_LET";
michael@0 1785 case TOK_RESERVED: return "TOK_RESERVED";
michael@0 1786 case TOK_STRICT_RESERVED: return "TOK_STRICT_RESERVED";
michael@0 1787 case TOK_STRICTEQ: return "TOK_STRICTEQ";
michael@0 1788 case TOK_EQ: return "TOK_EQ";
michael@0 1789 case TOK_STRICTNE: return "TOK_STRICTNE";
michael@0 1790 case TOK_NE: return "TOK_NE";
michael@0 1791 case TOK_TYPEOF: return "TOK_TYPEOF";
michael@0 1792 case TOK_VOID: return "TOK_VOID";
michael@0 1793 case TOK_NOT: return "TOK_NOT";
michael@0 1794 case TOK_BITNOT: return "TOK_BITNOT";
michael@0 1795 case TOK_LT: return "TOK_LT";
michael@0 1796 case TOK_LE: return "TOK_LE";
michael@0 1797 case TOK_GT: return "TOK_GT";
michael@0 1798 case TOK_GE: return "TOK_GE";
michael@0 1799 case TOK_LSH: return "TOK_LSH";
michael@0 1800 case TOK_RSH: return "TOK_RSH";
michael@0 1801 case TOK_URSH: return "TOK_URSH";
michael@0 1802 case TOK_ASSIGN: return "TOK_ASSIGN";
michael@0 1803 case TOK_ADDASSIGN: return "TOK_ADDASSIGN";
michael@0 1804 case TOK_SUBASSIGN: return "TOK_SUBASSIGN";
michael@0 1805 case TOK_BITORASSIGN: return "TOK_BITORASSIGN";
michael@0 1806 case TOK_BITXORASSIGN: return "TOK_BITXORASSIGN";
michael@0 1807 case TOK_BITANDASSIGN: return "TOK_BITANDASSIGN";
michael@0 1808 case TOK_LSHASSIGN: return "TOK_LSHASSIGN";
michael@0 1809 case TOK_RSHASSIGN: return "TOK_RSHASSIGN";
michael@0 1810 case TOK_URSHASSIGN: return "TOK_URSHASSIGN";
michael@0 1811 case TOK_MULASSIGN: return "TOK_MULASSIGN";
michael@0 1812 case TOK_DIVASSIGN: return "TOK_DIVASSIGN";
michael@0 1813 case TOK_MODASSIGN: return "TOK_MODASSIGN";
michael@0 1814 case TOK_EXPORT: return "TOK_EXPORT";
michael@0 1815 case TOK_IMPORT: return "TOK_IMPORT";
michael@0 1816 case TOK_LIMIT: break;
michael@0 1817 }
michael@0 1818
michael@0 1819 return "<bad TokenKind>";
michael@0 1820 }
michael@0 1821 #endif

mercurial