Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | // Acorn is a tiny, fast JavaScript parser written in JavaScript. |
michael@0 | 2 | // |
michael@0 | 3 | // Acorn was written by Marijn Haverbeke and released under an MIT |
michael@0 | 4 | // license. The Unicode regexps (for identifiers and whitespace) were |
michael@0 | 5 | // taken from [Esprima](http://esprima.org) by Ariya Hidayat. |
michael@0 | 6 | // |
michael@0 | 7 | // Git repositories for Acorn are available at |
michael@0 | 8 | // |
michael@0 | 9 | // http://marijnhaverbeke.nl/git/acorn |
michael@0 | 10 | // https://github.com/marijnh/acorn.git |
michael@0 | 11 | // |
michael@0 | 12 | // Please use the [github bug tracker][ghbt] to report issues. |
michael@0 | 13 | // |
michael@0 | 14 | // [ghbt]: https://github.com/marijnh/acorn/issues |
michael@0 | 15 | // |
michael@0 | 16 | // This file defines the main parser interface. The library also comes |
michael@0 | 17 | // with a [error-tolerant parser][dammit] and an |
michael@0 | 18 | // [abstract syntax tree walker][walk], defined in other files. |
michael@0 | 19 | // |
michael@0 | 20 | // [dammit]: acorn_loose.js |
michael@0 | 21 | // [walk]: util/walk.js |
michael@0 | 22 | |
michael@0 | 23 | (function(root, mod) { |
michael@0 | 24 | if (typeof exports == "object" && typeof module == "object") return mod(exports); // CommonJS |
michael@0 | 25 | if (typeof define == "function" && define.amd) return define(["exports"], mod); // AMD |
michael@0 | 26 | mod(root.acorn || (root.acorn = {})); // Plain browser env |
michael@0 | 27 | })(this, function(exports) { |
michael@0 | 28 | "use strict"; |
michael@0 | 29 | |
michael@0 | 30 | exports.version = "0.4.1"; |
michael@0 | 31 | |
michael@0 | 32 | // The main exported interface (under `self.acorn` when in the |
michael@0 | 33 | // browser) is a `parse` function that takes a code string and |
michael@0 | 34 | // returns an abstract syntax tree as specified by [Mozilla parser |
michael@0 | 35 | // API][api], with the caveat that the SpiderMonkey-specific syntax |
michael@0 | 36 | // (`let`, `yield`, inline XML, etc) is not recognized. |
michael@0 | 37 | // |
michael@0 | 38 | // [api]: https://developer.mozilla.org/en-US/docs/SpiderMonkey/Parser_API |
michael@0 | 39 | |
michael@0 | 40 | var options, input, inputLen, sourceFile; |
michael@0 | 41 | |
michael@0 | 42 | exports.parse = function(inpt, opts) { |
michael@0 | 43 | input = String(inpt); inputLen = input.length; |
michael@0 | 44 | setOptions(opts); |
michael@0 | 45 | initTokenState(); |
michael@0 | 46 | return parseTopLevel(options.program); |
michael@0 | 47 | }; |
michael@0 | 48 | |
michael@0 | 49 | // A second optional argument can be given to further configure |
michael@0 | 50 | // the parser process. These options are recognized: |
michael@0 | 51 | |
michael@0 | 52 | var defaultOptions = exports.defaultOptions = { |
michael@0 | 53 | // `ecmaVersion` indicates the ECMAScript version to parse. Must |
michael@0 | 54 | // be either 3 or 5. This |
michael@0 | 55 | // influences support for strict mode, the set of reserved words, and |
michael@0 | 56 | // support for getters and setter. |
michael@0 | 57 | ecmaVersion: 5, |
michael@0 | 58 | // Turn on `strictSemicolons` to prevent the parser from doing |
michael@0 | 59 | // automatic semicolon insertion. |
michael@0 | 60 | strictSemicolons: false, |
michael@0 | 61 | // When `allowTrailingCommas` is false, the parser will not allow |
michael@0 | 62 | // trailing commas in array and object literals. |
michael@0 | 63 | allowTrailingCommas: true, |
michael@0 | 64 | // By default, reserved words are not enforced. Enable |
michael@0 | 65 | // `forbidReserved` to enforce them. |
michael@0 | 66 | forbidReserved: false, |
michael@0 | 67 | // When `locations` is on, `loc` properties holding objects with |
michael@0 | 68 | // `start` and `end` properties in `{line, column}` form (with |
michael@0 | 69 | // line being 1-based and column 0-based) will be attached to the |
michael@0 | 70 | // nodes. |
michael@0 | 71 | locations: false, |
michael@0 | 72 | // A function can be passed as `onComment` option, which will |
michael@0 | 73 | // cause Acorn to call that function with `(block, text, start, |
michael@0 | 74 | // end)` parameters whenever a comment is skipped. `block` is a |
michael@0 | 75 | // boolean indicating whether this is a block (`/* */`) comment, |
michael@0 | 76 | // `text` is the content of the comment, and `start` and `end` are |
michael@0 | 77 | // character offsets that denote the start and end of the comment. |
michael@0 | 78 | // When the `locations` option is on, two more parameters are |
michael@0 | 79 | // passed, the full `{line, column}` locations of the start and |
michael@0 | 80 | // end of the comments. Note that you are not allowed to call the |
michael@0 | 81 | // parser from the callback—that will corrupt its internal state. |
michael@0 | 82 | onComment: null, |
michael@0 | 83 | // Nodes have their start and end characters offsets recorded in |
michael@0 | 84 | // `start` and `end` properties (directly on the node, rather than |
michael@0 | 85 | // the `loc` object, which holds line/column data. To also add a |
michael@0 | 86 | // [semi-standardized][range] `range` property holding a `[start, |
michael@0 | 87 | // end]` array with the same numbers, set the `ranges` option to |
michael@0 | 88 | // `true`. |
michael@0 | 89 | // |
michael@0 | 90 | // [range]: https://bugzilla.mozilla.org/show_bug.cgi?id=745678 |
michael@0 | 91 | ranges: false, |
michael@0 | 92 | // It is possible to parse multiple files into a single AST by |
michael@0 | 93 | // passing the tree produced by parsing the first file as |
michael@0 | 94 | // `program` option in subsequent parses. This will add the |
michael@0 | 95 | // toplevel forms of the parsed file to the `Program` (top) node |
michael@0 | 96 | // of an existing parse tree. |
michael@0 | 97 | program: null, |
michael@0 | 98 | // When `locations` is on, you can pass this to record the source |
michael@0 | 99 | // file in every node's `loc` object. |
michael@0 | 100 | sourceFile: null, |
michael@0 | 101 | // This value, if given, is stored in every node, whether |
michael@0 | 102 | // `locations` is on or off. |
michael@0 | 103 | directSourceFile: null |
michael@0 | 104 | }; |
michael@0 | 105 | |
michael@0 | 106 | function setOptions(opts) { |
michael@0 | 107 | options = opts || {}; |
michael@0 | 108 | for (var opt in defaultOptions) if (!Object.prototype.hasOwnProperty.call(options, opt)) |
michael@0 | 109 | options[opt] = defaultOptions[opt]; |
michael@0 | 110 | sourceFile = options.sourceFile || null; |
michael@0 | 111 | } |
michael@0 | 112 | |
michael@0 | 113 | // The `getLineInfo` function is mostly useful when the |
michael@0 | 114 | // `locations` option is off (for performance reasons) and you |
michael@0 | 115 | // want to find the line/column position for a given character |
michael@0 | 116 | // offset. `input` should be the code string that the offset refers |
michael@0 | 117 | // into. |
michael@0 | 118 | |
michael@0 | 119 | var getLineInfo = exports.getLineInfo = function(input, offset) { |
michael@0 | 120 | for (var line = 1, cur = 0;;) { |
michael@0 | 121 | lineBreak.lastIndex = cur; |
michael@0 | 122 | var match = lineBreak.exec(input); |
michael@0 | 123 | if (match && match.index < offset) { |
michael@0 | 124 | ++line; |
michael@0 | 125 | cur = match.index + match[0].length; |
michael@0 | 126 | } else break; |
michael@0 | 127 | } |
michael@0 | 128 | return {line: line, column: offset - cur}; |
michael@0 | 129 | }; |
michael@0 | 130 | |
michael@0 | 131 | // Acorn is organized as a tokenizer and a recursive-descent parser. |
michael@0 | 132 | // The `tokenize` export provides an interface to the tokenizer. |
michael@0 | 133 | // Because the tokenizer is optimized for being efficiently used by |
michael@0 | 134 | // the Acorn parser itself, this interface is somewhat crude and not |
michael@0 | 135 | // very modular. Performing another parse or call to `tokenize` will |
michael@0 | 136 | // reset the internal state, and invalidate existing tokenizers. |
michael@0 | 137 | |
michael@0 | 138 | exports.tokenize = function(inpt, opts) { |
michael@0 | 139 | input = String(inpt); inputLen = input.length; |
michael@0 | 140 | setOptions(opts); |
michael@0 | 141 | initTokenState(); |
michael@0 | 142 | |
michael@0 | 143 | var t = {}; |
michael@0 | 144 | function getToken(forceRegexp) { |
michael@0 | 145 | lastEnd = tokEnd; |
michael@0 | 146 | readToken(forceRegexp); |
michael@0 | 147 | t.start = tokStart; t.end = tokEnd; |
michael@0 | 148 | t.startLoc = tokStartLoc; t.endLoc = tokEndLoc; |
michael@0 | 149 | t.type = tokType; t.value = tokVal; |
michael@0 | 150 | return t; |
michael@0 | 151 | } |
michael@0 | 152 | getToken.jumpTo = function(pos, reAllowed) { |
michael@0 | 153 | tokPos = pos; |
michael@0 | 154 | if (options.locations) { |
michael@0 | 155 | tokCurLine = 1; |
michael@0 | 156 | tokLineStart = lineBreak.lastIndex = 0; |
michael@0 | 157 | var match; |
michael@0 | 158 | while ((match = lineBreak.exec(input)) && match.index < pos) { |
michael@0 | 159 | ++tokCurLine; |
michael@0 | 160 | tokLineStart = match.index + match[0].length; |
michael@0 | 161 | } |
michael@0 | 162 | } |
michael@0 | 163 | tokRegexpAllowed = reAllowed; |
michael@0 | 164 | skipSpace(); |
michael@0 | 165 | }; |
michael@0 | 166 | return getToken; |
michael@0 | 167 | }; |
michael@0 | 168 | |
michael@0 | 169 | // State is kept in (closure-)global variables. We already saw the |
michael@0 | 170 | // `options`, `input`, and `inputLen` variables above. |
michael@0 | 171 | |
michael@0 | 172 | // The current position of the tokenizer in the input. |
michael@0 | 173 | |
michael@0 | 174 | var tokPos; |
michael@0 | 175 | |
michael@0 | 176 | // The start and end offsets of the current token. |
michael@0 | 177 | |
michael@0 | 178 | var tokStart, tokEnd; |
michael@0 | 179 | |
michael@0 | 180 | // When `options.locations` is true, these hold objects |
michael@0 | 181 | // containing the tokens start and end line/column pairs. |
michael@0 | 182 | |
michael@0 | 183 | var tokStartLoc, tokEndLoc; |
michael@0 | 184 | |
michael@0 | 185 | // The type and value of the current token. Token types are objects, |
michael@0 | 186 | // named by variables against which they can be compared, and |
michael@0 | 187 | // holding properties that describe them (indicating, for example, |
michael@0 | 188 | // the precedence of an infix operator, and the original name of a |
michael@0 | 189 | // keyword token). The kind of value that's held in `tokVal` depends |
michael@0 | 190 | // on the type of the token. For literals, it is the literal value, |
michael@0 | 191 | // for operators, the operator name, and so on. |
michael@0 | 192 | |
michael@0 | 193 | var tokType, tokVal; |
michael@0 | 194 | |
michael@0 | 195 | // Interal state for the tokenizer. To distinguish between division |
michael@0 | 196 | // operators and regular expressions, it remembers whether the last |
michael@0 | 197 | // token was one that is allowed to be followed by an expression. |
michael@0 | 198 | // (If it is, a slash is probably a regexp, if it isn't it's a |
michael@0 | 199 | // division operator. See the `parseStatement` function for a |
michael@0 | 200 | // caveat.) |
michael@0 | 201 | |
michael@0 | 202 | var tokRegexpAllowed; |
michael@0 | 203 | |
michael@0 | 204 | // When `options.locations` is true, these are used to keep |
michael@0 | 205 | // track of the current line, and know when a new line has been |
michael@0 | 206 | // entered. |
michael@0 | 207 | |
michael@0 | 208 | var tokCurLine, tokLineStart; |
michael@0 | 209 | |
michael@0 | 210 | // These store the position of the previous token, which is useful |
michael@0 | 211 | // when finishing a node and assigning its `end` position. |
michael@0 | 212 | |
michael@0 | 213 | var lastStart, lastEnd, lastEndLoc; |
michael@0 | 214 | |
michael@0 | 215 | // This is the parser's state. `inFunction` is used to reject |
michael@0 | 216 | // `return` statements outside of functions, `labels` to verify that |
michael@0 | 217 | // `break` and `continue` have somewhere to jump to, and `strict` |
michael@0 | 218 | // indicates whether strict mode is on. |
michael@0 | 219 | |
michael@0 | 220 | var inFunction, labels, strict; |
michael@0 | 221 | |
michael@0 | 222 | // This function is used to raise exceptions on parse errors. It |
michael@0 | 223 | // takes an offset integer (into the current `input`) to indicate |
michael@0 | 224 | // the location of the error, attaches the position to the end |
michael@0 | 225 | // of the error message, and then raises a `SyntaxError` with that |
michael@0 | 226 | // message. |
michael@0 | 227 | |
michael@0 | 228 | function raise(pos, message) { |
michael@0 | 229 | var loc = getLineInfo(input, pos); |
michael@0 | 230 | message += " (" + loc.line + ":" + loc.column + ")"; |
michael@0 | 231 | var err = new SyntaxError(message); |
michael@0 | 232 | err.pos = pos; err.loc = loc; err.raisedAt = tokPos; |
michael@0 | 233 | throw err; |
michael@0 | 234 | } |
michael@0 | 235 | |
michael@0 | 236 | // Reused empty array added for node fields that are always empty. |
michael@0 | 237 | |
michael@0 | 238 | var empty = []; |
michael@0 | 239 | |
michael@0 | 240 | // ## Token types |
michael@0 | 241 | |
michael@0 | 242 | // The assignment of fine-grained, information-carrying type objects |
michael@0 | 243 | // allows the tokenizer to store the information it has about a |
michael@0 | 244 | // token in a way that is very cheap for the parser to look up. |
michael@0 | 245 | |
michael@0 | 246 | // All token type variables start with an underscore, to make them |
michael@0 | 247 | // easy to recognize. |
michael@0 | 248 | |
michael@0 | 249 | // These are the general types. The `type` property is only used to |
michael@0 | 250 | // make them recognizeable when debugging. |
michael@0 | 251 | |
michael@0 | 252 | var _num = {type: "num"}, _regexp = {type: "regexp"}, _string = {type: "string"}; |
michael@0 | 253 | var _name = {type: "name"}, _eof = {type: "eof"}; |
michael@0 | 254 | |
michael@0 | 255 | // Keyword tokens. The `keyword` property (also used in keyword-like |
michael@0 | 256 | // operators) indicates that the token originated from an |
michael@0 | 257 | // identifier-like word, which is used when parsing property names. |
michael@0 | 258 | // |
michael@0 | 259 | // The `beforeExpr` property is used to disambiguate between regular |
michael@0 | 260 | // expressions and divisions. It is set on all token types that can |
michael@0 | 261 | // be followed by an expression (thus, a slash after them would be a |
michael@0 | 262 | // regular expression). |
michael@0 | 263 | // |
michael@0 | 264 | // `isLoop` marks a keyword as starting a loop, which is important |
michael@0 | 265 | // to know when parsing a label, in order to allow or disallow |
michael@0 | 266 | // continue jumps to that label. |
michael@0 | 267 | |
michael@0 | 268 | var _break = {keyword: "break"}, _case = {keyword: "case", beforeExpr: true}, _catch = {keyword: "catch"}; |
michael@0 | 269 | var _continue = {keyword: "continue"}, _debugger = {keyword: "debugger"}, _default = {keyword: "default"}; |
michael@0 | 270 | var _do = {keyword: "do", isLoop: true}, _else = {keyword: "else", beforeExpr: true}; |
michael@0 | 271 | var _finally = {keyword: "finally"}, _for = {keyword: "for", isLoop: true}, _function = {keyword: "function"}; |
michael@0 | 272 | var _if = {keyword: "if"}, _return = {keyword: "return", beforeExpr: true}, _switch = {keyword: "switch"}; |
michael@0 | 273 | var _throw = {keyword: "throw", beforeExpr: true}, _try = {keyword: "try"}, _var = {keyword: "var"}; |
michael@0 | 274 | var _while = {keyword: "while", isLoop: true}, _with = {keyword: "with"}, _new = {keyword: "new", beforeExpr: true}; |
michael@0 | 275 | var _this = {keyword: "this"}; |
michael@0 | 276 | |
michael@0 | 277 | // The keywords that denote values. |
michael@0 | 278 | |
michael@0 | 279 | var _null = {keyword: "null", atomValue: null}, _true = {keyword: "true", atomValue: true}; |
michael@0 | 280 | var _false = {keyword: "false", atomValue: false}; |
michael@0 | 281 | |
michael@0 | 282 | // Some keywords are treated as regular operators. `in` sometimes |
michael@0 | 283 | // (when parsing `for`) needs to be tested against specifically, so |
michael@0 | 284 | // we assign a variable name to it for quick comparing. |
michael@0 | 285 | |
michael@0 | 286 | var _in = {keyword: "in", binop: 7, beforeExpr: true}; |
michael@0 | 287 | |
michael@0 | 288 | // Map keyword names to token types. |
michael@0 | 289 | |
michael@0 | 290 | var keywordTypes = {"break": _break, "case": _case, "catch": _catch, |
michael@0 | 291 | "continue": _continue, "debugger": _debugger, "default": _default, |
michael@0 | 292 | "do": _do, "else": _else, "finally": _finally, "for": _for, |
michael@0 | 293 | "function": _function, "if": _if, "return": _return, "switch": _switch, |
michael@0 | 294 | "throw": _throw, "try": _try, "var": _var, "while": _while, "with": _with, |
michael@0 | 295 | "null": _null, "true": _true, "false": _false, "new": _new, "in": _in, |
michael@0 | 296 | "instanceof": {keyword: "instanceof", binop: 7, beforeExpr: true}, "this": _this, |
michael@0 | 297 | "typeof": {keyword: "typeof", prefix: true, beforeExpr: true}, |
michael@0 | 298 | "void": {keyword: "void", prefix: true, beforeExpr: true}, |
michael@0 | 299 | "delete": {keyword: "delete", prefix: true, beforeExpr: true}}; |
michael@0 | 300 | |
michael@0 | 301 | // Punctuation token types. Again, the `type` property is purely for debugging. |
michael@0 | 302 | |
michael@0 | 303 | var _bracketL = {type: "[", beforeExpr: true}, _bracketR = {type: "]"}, _braceL = {type: "{", beforeExpr: true}; |
michael@0 | 304 | var _braceR = {type: "}"}, _parenL = {type: "(", beforeExpr: true}, _parenR = {type: ")"}; |
michael@0 | 305 | var _comma = {type: ",", beforeExpr: true}, _semi = {type: ";", beforeExpr: true}; |
michael@0 | 306 | var _colon = {type: ":", beforeExpr: true}, _dot = {type: "."}, _question = {type: "?", beforeExpr: true}; |
michael@0 | 307 | |
michael@0 | 308 | // Operators. These carry several kinds of properties to help the |
michael@0 | 309 | // parser use them properly (the presence of these properties is |
michael@0 | 310 | // what categorizes them as operators). |
michael@0 | 311 | // |
michael@0 | 312 | // `binop`, when present, specifies that this operator is a binary |
michael@0 | 313 | // operator, and will refer to its precedence. |
michael@0 | 314 | // |
michael@0 | 315 | // `prefix` and `postfix` mark the operator as a prefix or postfix |
michael@0 | 316 | // unary operator. `isUpdate` specifies that the node produced by |
michael@0 | 317 | // the operator should be of type UpdateExpression rather than |
michael@0 | 318 | // simply UnaryExpression (`++` and `--`). |
michael@0 | 319 | // |
michael@0 | 320 | // `isAssign` marks all of `=`, `+=`, `-=` etcetera, which act as |
michael@0 | 321 | // binary operators with a very low precedence, that should result |
michael@0 | 322 | // in AssignmentExpression nodes. |
michael@0 | 323 | |
michael@0 | 324 | var _slash = {binop: 10, beforeExpr: true}, _eq = {isAssign: true, beforeExpr: true}; |
michael@0 | 325 | var _assign = {isAssign: true, beforeExpr: true}; |
michael@0 | 326 | var _incDec = {postfix: true, prefix: true, isUpdate: true}, _prefix = {prefix: true, beforeExpr: true}; |
michael@0 | 327 | var _logicalOR = {binop: 1, beforeExpr: true}; |
michael@0 | 328 | var _logicalAND = {binop: 2, beforeExpr: true}; |
michael@0 | 329 | var _bitwiseOR = {binop: 3, beforeExpr: true}; |
michael@0 | 330 | var _bitwiseXOR = {binop: 4, beforeExpr: true}; |
michael@0 | 331 | var _bitwiseAND = {binop: 5, beforeExpr: true}; |
michael@0 | 332 | var _equality = {binop: 6, beforeExpr: true}; |
michael@0 | 333 | var _relational = {binop: 7, beforeExpr: true}; |
michael@0 | 334 | var _bitShift = {binop: 8, beforeExpr: true}; |
michael@0 | 335 | var _plusMin = {binop: 9, prefix: true, beforeExpr: true}; |
michael@0 | 336 | var _multiplyModulo = {binop: 10, beforeExpr: true}; |
michael@0 | 337 | |
michael@0 | 338 | // Provide access to the token types for external users of the |
michael@0 | 339 | // tokenizer. |
michael@0 | 340 | |
michael@0 | 341 | exports.tokTypes = {bracketL: _bracketL, bracketR: _bracketR, braceL: _braceL, braceR: _braceR, |
michael@0 | 342 | parenL: _parenL, parenR: _parenR, comma: _comma, semi: _semi, colon: _colon, |
michael@0 | 343 | dot: _dot, question: _question, slash: _slash, eq: _eq, name: _name, eof: _eof, |
michael@0 | 344 | num: _num, regexp: _regexp, string: _string}; |
michael@0 | 345 | for (var kw in keywordTypes) exports.tokTypes["_" + kw] = keywordTypes[kw]; |
michael@0 | 346 | |
michael@0 | 347 | // This is a trick taken from Esprima. It turns out that, on |
michael@0 | 348 | // non-Chrome browsers, to check whether a string is in a set, a |
michael@0 | 349 | // predicate containing a big ugly `switch` statement is faster than |
michael@0 | 350 | // a regular expression, and on Chrome the two are about on par. |
michael@0 | 351 | // This function uses `eval` (non-lexical) to produce such a |
michael@0 | 352 | // predicate from a space-separated string of words. |
michael@0 | 353 | // |
michael@0 | 354 | // It starts by sorting the words by length. |
michael@0 | 355 | |
michael@0 | 356 | function makePredicate(words) { |
michael@0 | 357 | words = words.split(" "); |
michael@0 | 358 | var f = "", cats = []; |
michael@0 | 359 | out: for (var i = 0; i < words.length; ++i) { |
michael@0 | 360 | for (var j = 0; j < cats.length; ++j) |
michael@0 | 361 | if (cats[j][0].length == words[i].length) { |
michael@0 | 362 | cats[j].push(words[i]); |
michael@0 | 363 | continue out; |
michael@0 | 364 | } |
michael@0 | 365 | cats.push([words[i]]); |
michael@0 | 366 | } |
michael@0 | 367 | function compareTo(arr) { |
michael@0 | 368 | if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";"; |
michael@0 | 369 | f += "switch(str){"; |
michael@0 | 370 | for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":"; |
michael@0 | 371 | f += "return true}return false;"; |
michael@0 | 372 | } |
michael@0 | 373 | |
michael@0 | 374 | // When there are more than three length categories, an outer |
michael@0 | 375 | // switch first dispatches on the lengths, to save on comparisons. |
michael@0 | 376 | |
michael@0 | 377 | if (cats.length > 3) { |
michael@0 | 378 | cats.sort(function(a, b) {return b.length - a.length;}); |
michael@0 | 379 | f += "switch(str.length){"; |
michael@0 | 380 | for (var i = 0; i < cats.length; ++i) { |
michael@0 | 381 | var cat = cats[i]; |
michael@0 | 382 | f += "case " + cat[0].length + ":"; |
michael@0 | 383 | compareTo(cat); |
michael@0 | 384 | } |
michael@0 | 385 | f += "}"; |
michael@0 | 386 | |
michael@0 | 387 | // Otherwise, simply generate a flat `switch` statement. |
michael@0 | 388 | |
michael@0 | 389 | } else { |
michael@0 | 390 | compareTo(words); |
michael@0 | 391 | } |
michael@0 | 392 | return new Function("str", f); |
michael@0 | 393 | } |
michael@0 | 394 | |
michael@0 | 395 | // The ECMAScript 3 reserved word list. |
michael@0 | 396 | |
michael@0 | 397 | var isReservedWord3 = makePredicate("abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized throws transient volatile"); |
michael@0 | 398 | |
michael@0 | 399 | // ECMAScript 5 reserved words. |
michael@0 | 400 | |
michael@0 | 401 | var isReservedWord5 = makePredicate("class enum extends super const export import"); |
michael@0 | 402 | |
michael@0 | 403 | // The additional reserved words in strict mode. |
michael@0 | 404 | |
michael@0 | 405 | var isStrictReservedWord = makePredicate("implements interface let package private protected public static yield"); |
michael@0 | 406 | |
michael@0 | 407 | // The forbidden variable names in strict mode. |
michael@0 | 408 | |
michael@0 | 409 | var isStrictBadIdWord = makePredicate("eval arguments"); |
michael@0 | 410 | |
michael@0 | 411 | // And the keywords. |
michael@0 | 412 | |
michael@0 | 413 | var isKeyword = makePredicate("break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in this"); |
michael@0 | 414 | |
michael@0 | 415 | // ## Character categories |
michael@0 | 416 | |
michael@0 | 417 | // Big ugly regular expressions that match characters in the |
michael@0 | 418 | // whitespace, identifier, and identifier-start categories. These |
michael@0 | 419 | // are only applied when a character is found to actually have a |
michael@0 | 420 | // code point above 128. |
michael@0 | 421 | |
michael@0 | 422 | var nonASCIIwhitespace = /[\u1680\u180e\u2000-\u200a\u202f\u205f\u3000\ufeff]/; |
michael@0 | 423 | var nonASCIIidentifierStartChars = "\xaa\xb5\xba\xc0-\xd6\xd8-\xf6\xf8-\u02c1\u02c6-\u02d1\u02e0-\u02e4\u02ec\u02ee\u0370-\u0374\u0376\u0377\u037a-\u037d\u0386\u0388-\u038a\u038c\u038e-\u03a1\u03a3-\u03f5\u03f7-\u0481\u048a-\u0527\u0531-\u0556\u0559\u0561-\u0587\u05d0-\u05ea\u05f0-\u05f2\u0620-\u064a\u066e\u066f\u0671-\u06d3\u06d5\u06e5\u06e6\u06ee\u06ef\u06fa-\u06fc\u06ff\u0710\u0712-\u072f\u074d-\u07a5\u07b1\u07ca-\u07ea\u07f4\u07f5\u07fa\u0800-\u0815\u081a\u0824\u0828\u0840-\u0858\u08a0\u08a2-\u08ac\u0904-\u0939\u093d\u0950\u0958-\u0961\u0971-\u0977\u0979-\u097f\u0985-\u098c\u098f\u0990\u0993-\u09a8\u09aa-\u09b0\u09b2\u09b6-\u09b9\u09bd\u09ce\u09dc\u09dd\u09df-\u09e1\u09f0\u09f1\u0a05-\u0a0a\u0a0f\u0a10\u0a13-\u0a28\u0a2a-\u0a30\u0a32\u0a33\u0a35\u0a36\u0a38\u0a39\u0a59-\u0a5c\u0a5e\u0a72-\u0a74\u0a85-\u0a8d\u0a8f-\u0a91\u0a93-\u0aa8\u0aaa-\u0ab0\u0ab2\u0ab3\u0ab5-\u0ab9\u0abd\u0ad0\u0ae0\u0ae1\u0b05-\u0b0c\u0b0f\u0b10\u0b13-\u0b28\u0b2a-\u0b30\u0b32\u0b33\u0b35-\u0b39\u0b3d\u0b5c\u0b5d\u0b5f-\u0b61\u0b71\u0b83\u0b85-\u0b8a\u0b8e-\u0b90\u0b92-\u0b95\u0b99\u0b9a\u0b9c\u0b9e\u0b9f\u0ba3\u0ba4\u0ba8-\u0baa\u0bae-\u0bb9\u0bd0\u0c05-\u0c0c\u0c0e-\u0c10\u0c12-\u0c28\u0c2a-\u0c33\u0c35-\u0c39\u0c3d\u0c58\u0c59\u0c60\u0c61\u0c85-\u0c8c\u0c8e-\u0c90\u0c92-\u0ca8\u0caa-\u0cb3\u0cb5-\u0cb9\u0cbd\u0cde\u0ce0\u0ce1\u0cf1\u0cf2\u0d05-\u0d0c\u0d0e-\u0d10\u0d12-\u0d3a\u0d3d\u0d4e\u0d60\u0d61\u0d7a-\u0d7f\u0d85-\u0d96\u0d9a-\u0db1\u0db3-\u0dbb\u0dbd\u0dc0-\u0dc6\u0e01-\u0e30\u0e32\u0e33\u0e40-\u0e46\u0e81\u0e82\u0e84\u0e87\u0e88\u0e8a\u0e8d\u0e94-\u0e97\u0e99-\u0e9f\u0ea1-\u0ea3\u0ea5\u0ea7\u0eaa\u0eab\u0ead-\u0eb0\u0eb2\u0eb3\u0ebd\u0ec0-\u0ec4\u0ec6\u0edc-\u0edf\u0f00\u0f40-\u0f47\u0f49-\u0f6c\u0f88-\u0f8c\u1000-\u102a\u103f\u1050-\u1055\u105a-\u105d\u1061\u1065\u1066\u106e-\u1070\u1075-\u1081\u108e\u10a0-\u10c5\u10c7\u10cd\u10d0-\u10fa\u10fc-\u1248\u124a-\u124d\u1250-\u1256\u1258\u125a-\u125d\u1260-\u1288\u128a-\u128d\u1290-\u12b0\u12b2-\u12b5\u12b8-\u12be\u12c0\u12c2-\u12c5\u12c8-\u12d6\u12d8-\u1310\u1312-\u1315\u1318-\u135a\u1380-\u138f\u13a0-\u13f4\u1401-\u166c\u166f-\u167f\u1681-\u169a\u16a0-\u16ea\u16ee-\u16f0\u1700-\u170c\u170e-\u1711\u1720-\u1731\u1740-\u1751\u1760-\u176c\u176e-\u1770\u1780-\u17b3\u17d7\u17dc\u1820-\u1877\u1880-\u18a8\u18aa\u18b0-\u18f5\u1900-\u191c\u1950-\u196d\u1970-\u1974\u1980-\u19ab\u19c1-\u19c7\u1a00-\u1a16\u1a20-\u1a54\u1aa7\u1b05-\u1b33\u1b45-\u1b4b\u1b83-\u1ba0\u1bae\u1baf\u1bba-\u1be5\u1c00-\u1c23\u1c4d-\u1c4f\u1c5a-\u1c7d\u1ce9-\u1cec\u1cee-\u1cf1\u1cf5\u1cf6\u1d00-\u1dbf\u1e00-\u1f15\u1f18-\u1f1d\u1f20-\u1f45\u1f48-\u1f4d\u1f50-\u1f57\u1f59\u1f5b\u1f5d\u1f5f-\u1f7d\u1f80-\u1fb4\u1fb6-\u1fbc\u1fbe\u1fc2-\u1fc4\u1fc6-\u1fcc\u1fd0-\u1fd3\u1fd6-\u1fdb\u1fe0-\u1fec\u1ff2-\u1ff4\u1ff6-\u1ffc\u2071\u207f\u2090-\u209c\u2102\u2107\u210a-\u2113\u2115\u2119-\u211d\u2124\u2126\u2128\u212a-\u212d\u212f-\u2139\u213c-\u213f\u2145-\u2149\u214e\u2160-\u2188\u2c00-\u2c2e\u2c30-\u2c5e\u2c60-\u2ce4\u2ceb-\u2cee\u2cf2\u2cf3\u2d00-\u2d25\u2d27\u2d2d\u2d30-\u2d67\u2d6f\u2d80-\u2d96\u2da0-\u2da6\u2da8-\u2dae\u2db0-\u2db6\u2db8-\u2dbe\u2dc0-\u2dc6\u2dc8-\u2dce\u2dd0-\u2dd6\u2dd8-\u2dde\u2e2f\u3005-\u3007\u3021-\u3029\u3031-\u3035\u3038-\u303c\u3041-\u3096\u309d-\u309f\u30a1-\u30fa\u30fc-\u30ff\u3105-\u312d\u3131-\u318e\u31a0-\u31ba\u31f0-\u31ff\u3400-\u4db5\u4e00-\u9fcc\ua000-\ua48c\ua4d0-\ua4fd\ua500-\ua60c\ua610-\ua61f\ua62a\ua62b\ua640-\ua66e\ua67f-\ua697\ua6a0-\ua6ef\ua717-\ua71f\ua722-\ua788\ua78b-\ua78e\ua790-\ua793\ua7a0-\ua7aa\ua7f8-\ua801\ua803-\ua805\ua807-\ua80a\ua80c-\ua822\ua840-\ua873\ua882-\ua8b3\ua8f2-\ua8f7\ua8fb\ua90a-\ua925\ua930-\ua946\ua960-\ua97c\ua984-\ua9b2\ua9cf\uaa00-\uaa28\uaa40-\uaa42\uaa44-\uaa4b\uaa60-\uaa76\uaa7a\uaa80-\uaaaf\uaab1\uaab5\uaab6\uaab9-\uaabd\uaac0\uaac2\uaadb-\uaadd\uaae0-\uaaea\uaaf2-\uaaf4\uab01-\uab06\uab09-\uab0e\uab11-\uab16\uab20-\uab26\uab28-\uab2e\uabc0-\uabe2\uac00-\ud7a3\ud7b0-\ud7c6\ud7cb-\ud7fb\uf900-\ufa6d\ufa70-\ufad9\ufb00-\ufb06\ufb13-\ufb17\ufb1d\ufb1f-\ufb28\ufb2a-\ufb36\ufb38-\ufb3c\ufb3e\ufb40\ufb41\ufb43\ufb44\ufb46-\ufbb1\ufbd3-\ufd3d\ufd50-\ufd8f\ufd92-\ufdc7\ufdf0-\ufdfb\ufe70-\ufe74\ufe76-\ufefc\uff21-\uff3a\uff41-\uff5a\uff66-\uffbe\uffc2-\uffc7\uffca-\uffcf\uffd2-\uffd7\uffda-\uffdc"; |
michael@0 | 424 | var nonASCIIidentifierChars = "\u0300-\u036f\u0483-\u0487\u0591-\u05bd\u05bf\u05c1\u05c2\u05c4\u05c5\u05c7\u0610-\u061a\u0620-\u0649\u0672-\u06d3\u06e7-\u06e8\u06fb-\u06fc\u0730-\u074a\u0800-\u0814\u081b-\u0823\u0825-\u0827\u0829-\u082d\u0840-\u0857\u08e4-\u08fe\u0900-\u0903\u093a-\u093c\u093e-\u094f\u0951-\u0957\u0962-\u0963\u0966-\u096f\u0981-\u0983\u09bc\u09be-\u09c4\u09c7\u09c8\u09d7\u09df-\u09e0\u0a01-\u0a03\u0a3c\u0a3e-\u0a42\u0a47\u0a48\u0a4b-\u0a4d\u0a51\u0a66-\u0a71\u0a75\u0a81-\u0a83\u0abc\u0abe-\u0ac5\u0ac7-\u0ac9\u0acb-\u0acd\u0ae2-\u0ae3\u0ae6-\u0aef\u0b01-\u0b03\u0b3c\u0b3e-\u0b44\u0b47\u0b48\u0b4b-\u0b4d\u0b56\u0b57\u0b5f-\u0b60\u0b66-\u0b6f\u0b82\u0bbe-\u0bc2\u0bc6-\u0bc8\u0bca-\u0bcd\u0bd7\u0be6-\u0bef\u0c01-\u0c03\u0c46-\u0c48\u0c4a-\u0c4d\u0c55\u0c56\u0c62-\u0c63\u0c66-\u0c6f\u0c82\u0c83\u0cbc\u0cbe-\u0cc4\u0cc6-\u0cc8\u0cca-\u0ccd\u0cd5\u0cd6\u0ce2-\u0ce3\u0ce6-\u0cef\u0d02\u0d03\u0d46-\u0d48\u0d57\u0d62-\u0d63\u0d66-\u0d6f\u0d82\u0d83\u0dca\u0dcf-\u0dd4\u0dd6\u0dd8-\u0ddf\u0df2\u0df3\u0e34-\u0e3a\u0e40-\u0e45\u0e50-\u0e59\u0eb4-\u0eb9\u0ec8-\u0ecd\u0ed0-\u0ed9\u0f18\u0f19\u0f20-\u0f29\u0f35\u0f37\u0f39\u0f41-\u0f47\u0f71-\u0f84\u0f86-\u0f87\u0f8d-\u0f97\u0f99-\u0fbc\u0fc6\u1000-\u1029\u1040-\u1049\u1067-\u106d\u1071-\u1074\u1082-\u108d\u108f-\u109d\u135d-\u135f\u170e-\u1710\u1720-\u1730\u1740-\u1750\u1772\u1773\u1780-\u17b2\u17dd\u17e0-\u17e9\u180b-\u180d\u1810-\u1819\u1920-\u192b\u1930-\u193b\u1951-\u196d\u19b0-\u19c0\u19c8-\u19c9\u19d0-\u19d9\u1a00-\u1a15\u1a20-\u1a53\u1a60-\u1a7c\u1a7f-\u1a89\u1a90-\u1a99\u1b46-\u1b4b\u1b50-\u1b59\u1b6b-\u1b73\u1bb0-\u1bb9\u1be6-\u1bf3\u1c00-\u1c22\u1c40-\u1c49\u1c5b-\u1c7d\u1cd0-\u1cd2\u1d00-\u1dbe\u1e01-\u1f15\u200c\u200d\u203f\u2040\u2054\u20d0-\u20dc\u20e1\u20e5-\u20f0\u2d81-\u2d96\u2de0-\u2dff\u3021-\u3028\u3099\u309a\ua640-\ua66d\ua674-\ua67d\ua69f\ua6f0-\ua6f1\ua7f8-\ua800\ua806\ua80b\ua823-\ua827\ua880-\ua881\ua8b4-\ua8c4\ua8d0-\ua8d9\ua8f3-\ua8f7\ua900-\ua909\ua926-\ua92d\ua930-\ua945\ua980-\ua983\ua9b3-\ua9c0\uaa00-\uaa27\uaa40-\uaa41\uaa4c-\uaa4d\uaa50-\uaa59\uaa7b\uaae0-\uaae9\uaaf2-\uaaf3\uabc0-\uabe1\uabec\uabed\uabf0-\uabf9\ufb20-\ufb28\ufe00-\ufe0f\ufe20-\ufe26\ufe33\ufe34\ufe4d-\ufe4f\uff10-\uff19\uff3f"; |
michael@0 | 425 | var nonASCIIidentifierStart = new RegExp("[" + nonASCIIidentifierStartChars + "]"); |
michael@0 | 426 | var nonASCIIidentifier = new RegExp("[" + nonASCIIidentifierStartChars + nonASCIIidentifierChars + "]"); |
michael@0 | 427 | |
michael@0 | 428 | // Whether a single character denotes a newline. |
michael@0 | 429 | |
michael@0 | 430 | var newline = /[\n\r\u2028\u2029]/; |
michael@0 | 431 | |
michael@0 | 432 | // Matches a whole line break (where CRLF is considered a single |
michael@0 | 433 | // line break). Used to count lines. |
michael@0 | 434 | |
michael@0 | 435 | var lineBreak = /\r\n|[\n\r\u2028\u2029]/g; |
michael@0 | 436 | |
michael@0 | 437 | // Test whether a given character code starts an identifier. |
michael@0 | 438 | |
michael@0 | 439 | var isIdentifierStart = exports.isIdentifierStart = function(code) { |
michael@0 | 440 | if (code < 65) return code === 36; |
michael@0 | 441 | if (code < 91) return true; |
michael@0 | 442 | if (code < 97) return code === 95; |
michael@0 | 443 | if (code < 123)return true; |
michael@0 | 444 | return code >= 0xaa && nonASCIIidentifierStart.test(String.fromCharCode(code)); |
michael@0 | 445 | }; |
michael@0 | 446 | |
michael@0 | 447 | // Test whether a given character is part of an identifier. |
michael@0 | 448 | |
michael@0 | 449 | var isIdentifierChar = exports.isIdentifierChar = function(code) { |
michael@0 | 450 | if (code < 48) return code === 36; |
michael@0 | 451 | if (code < 58) return true; |
michael@0 | 452 | if (code < 65) return false; |
michael@0 | 453 | if (code < 91) return true; |
michael@0 | 454 | if (code < 97) return code === 95; |
michael@0 | 455 | if (code < 123)return true; |
michael@0 | 456 | return code >= 0xaa && nonASCIIidentifier.test(String.fromCharCode(code)); |
michael@0 | 457 | }; |
michael@0 | 458 | |
michael@0 | 459 | // ## Tokenizer |
michael@0 | 460 | |
michael@0 | 461 | // These are used when `options.locations` is on, for the |
michael@0 | 462 | // `tokStartLoc` and `tokEndLoc` properties. |
michael@0 | 463 | |
michael@0 | 464 | function line_loc_t() { |
michael@0 | 465 | this.line = tokCurLine; |
michael@0 | 466 | this.column = tokPos - tokLineStart; |
michael@0 | 467 | } |
michael@0 | 468 | |
michael@0 | 469 | // Reset the token state. Used at the start of a parse. |
michael@0 | 470 | |
michael@0 | 471 | function initTokenState() { |
michael@0 | 472 | tokCurLine = 1; |
michael@0 | 473 | tokPos = tokLineStart = 0; |
michael@0 | 474 | tokRegexpAllowed = true; |
michael@0 | 475 | skipSpace(); |
michael@0 | 476 | } |
michael@0 | 477 | |
michael@0 | 478 | // Called at the end of every token. Sets `tokEnd`, `tokVal`, and |
michael@0 | 479 | // `tokRegexpAllowed`, and skips the space after the token, so that |
michael@0 | 480 | // the next one's `tokStart` will point at the right position. |
michael@0 | 481 | |
michael@0 | 482 | function finishToken(type, val) { |
michael@0 | 483 | tokEnd = tokPos; |
michael@0 | 484 | if (options.locations) tokEndLoc = new line_loc_t; |
michael@0 | 485 | tokType = type; |
michael@0 | 486 | skipSpace(); |
michael@0 | 487 | tokVal = val; |
michael@0 | 488 | tokRegexpAllowed = type.beforeExpr; |
michael@0 | 489 | } |
michael@0 | 490 | |
michael@0 | 491 | function skipBlockComment() { |
michael@0 | 492 | var startLoc = options.onComment && options.locations && new line_loc_t; |
michael@0 | 493 | var start = tokPos, end = input.indexOf("*/", tokPos += 2); |
michael@0 | 494 | if (end === -1) raise(tokPos - 2, "Unterminated comment"); |
michael@0 | 495 | tokPos = end + 2; |
michael@0 | 496 | if (options.locations) { |
michael@0 | 497 | lineBreak.lastIndex = start; |
michael@0 | 498 | var match; |
michael@0 | 499 | while ((match = lineBreak.exec(input)) && match.index < tokPos) { |
michael@0 | 500 | ++tokCurLine; |
michael@0 | 501 | tokLineStart = match.index + match[0].length; |
michael@0 | 502 | } |
michael@0 | 503 | } |
michael@0 | 504 | if (options.onComment) |
michael@0 | 505 | options.onComment(true, input.slice(start + 2, end), start, tokPos, |
michael@0 | 506 | startLoc, options.locations && new line_loc_t); |
michael@0 | 507 | } |
michael@0 | 508 | |
michael@0 | 509 | function skipLineComment() { |
michael@0 | 510 | var start = tokPos; |
michael@0 | 511 | var startLoc = options.onComment && options.locations && new line_loc_t; |
michael@0 | 512 | var ch = input.charCodeAt(tokPos+=2); |
michael@0 | 513 | while (tokPos < inputLen && ch !== 10 && ch !== 13 && ch !== 8232 && ch !== 8233) { |
michael@0 | 514 | ++tokPos; |
michael@0 | 515 | ch = input.charCodeAt(tokPos); |
michael@0 | 516 | } |
michael@0 | 517 | if (options.onComment) |
michael@0 | 518 | options.onComment(false, input.slice(start + 2, tokPos), start, tokPos, |
michael@0 | 519 | startLoc, options.locations && new line_loc_t); |
michael@0 | 520 | } |
michael@0 | 521 | |
michael@0 | 522 | // Called at the start of the parse and after every token. Skips |
michael@0 | 523 | // whitespace and comments, and. |
michael@0 | 524 | |
michael@0 | 525 | function skipSpace() { |
michael@0 | 526 | while (tokPos < inputLen) { |
michael@0 | 527 | var ch = input.charCodeAt(tokPos); |
michael@0 | 528 | if (ch === 32) { // ' ' |
michael@0 | 529 | ++tokPos; |
michael@0 | 530 | } else if (ch === 13) { |
michael@0 | 531 | ++tokPos; |
michael@0 | 532 | var next = input.charCodeAt(tokPos); |
michael@0 | 533 | if (next === 10) { |
michael@0 | 534 | ++tokPos; |
michael@0 | 535 | } |
michael@0 | 536 | if (options.locations) { |
michael@0 | 537 | ++tokCurLine; |
michael@0 | 538 | tokLineStart = tokPos; |
michael@0 | 539 | } |
michael@0 | 540 | } else if (ch === 10 || ch === 8232 || ch === 8233) { |
michael@0 | 541 | ++tokPos; |
michael@0 | 542 | if (options.locations) { |
michael@0 | 543 | ++tokCurLine; |
michael@0 | 544 | tokLineStart = tokPos; |
michael@0 | 545 | } |
michael@0 | 546 | } else if (ch > 8 && ch < 14) { |
michael@0 | 547 | ++tokPos; |
michael@0 | 548 | } else if (ch === 47) { // '/' |
michael@0 | 549 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 550 | if (next === 42) { // '*' |
michael@0 | 551 | skipBlockComment(); |
michael@0 | 552 | } else if (next === 47) { // '/' |
michael@0 | 553 | skipLineComment(); |
michael@0 | 554 | } else break; |
michael@0 | 555 | } else if (ch === 160) { // '\xa0' |
michael@0 | 556 | ++tokPos; |
michael@0 | 557 | } else if (ch >= 5760 && nonASCIIwhitespace.test(String.fromCharCode(ch))) { |
michael@0 | 558 | ++tokPos; |
michael@0 | 559 | } else { |
michael@0 | 560 | break; |
michael@0 | 561 | } |
michael@0 | 562 | } |
michael@0 | 563 | } |
michael@0 | 564 | |
michael@0 | 565 | // ### Token reading |
michael@0 | 566 | |
michael@0 | 567 | // This is the function that is called to fetch the next token. It |
michael@0 | 568 | // is somewhat obscure, because it works in character codes rather |
michael@0 | 569 | // than characters, and because operator parsing has been inlined |
michael@0 | 570 | // into it. |
michael@0 | 571 | // |
michael@0 | 572 | // All in the name of speed. |
michael@0 | 573 | // |
michael@0 | 574 | // The `forceRegexp` parameter is used in the one case where the |
michael@0 | 575 | // `tokRegexpAllowed` trick does not work. See `parseStatement`. |
michael@0 | 576 | |
michael@0 | 577 | function readToken_dot() { |
michael@0 | 578 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 579 | if (next >= 48 && next <= 57) return readNumber(true); |
michael@0 | 580 | ++tokPos; |
michael@0 | 581 | return finishToken(_dot); |
michael@0 | 582 | } |
michael@0 | 583 | |
michael@0 | 584 | function readToken_slash() { // '/' |
michael@0 | 585 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 586 | if (tokRegexpAllowed) {++tokPos; return readRegexp();} |
michael@0 | 587 | if (next === 61) return finishOp(_assign, 2); |
michael@0 | 588 | return finishOp(_slash, 1); |
michael@0 | 589 | } |
michael@0 | 590 | |
michael@0 | 591 | function readToken_mult_modulo() { // '%*' |
michael@0 | 592 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 593 | if (next === 61) return finishOp(_assign, 2); |
michael@0 | 594 | return finishOp(_multiplyModulo, 1); |
michael@0 | 595 | } |
michael@0 | 596 | |
michael@0 | 597 | function readToken_pipe_amp(code) { // '|&' |
michael@0 | 598 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 599 | if (next === code) return finishOp(code === 124 ? _logicalOR : _logicalAND, 2); |
michael@0 | 600 | if (next === 61) return finishOp(_assign, 2); |
michael@0 | 601 | return finishOp(code === 124 ? _bitwiseOR : _bitwiseAND, 1); |
michael@0 | 602 | } |
michael@0 | 603 | |
michael@0 | 604 | function readToken_caret() { // '^' |
michael@0 | 605 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 606 | if (next === 61) return finishOp(_assign, 2); |
michael@0 | 607 | return finishOp(_bitwiseXOR, 1); |
michael@0 | 608 | } |
michael@0 | 609 | |
michael@0 | 610 | function readToken_plus_min(code) { // '+-' |
michael@0 | 611 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 612 | if (next === code) { |
michael@0 | 613 | if (next == 45 && input.charCodeAt(tokPos + 2) == 62 && |
michael@0 | 614 | newline.test(input.slice(lastEnd, tokPos))) { |
michael@0 | 615 | // A `-->` line comment |
michael@0 | 616 | tokPos += 3; |
michael@0 | 617 | skipLineComment(); |
michael@0 | 618 | skipSpace(); |
michael@0 | 619 | return readToken(); |
michael@0 | 620 | } |
michael@0 | 621 | return finishOp(_incDec, 2); |
michael@0 | 622 | } |
michael@0 | 623 | if (next === 61) return finishOp(_assign, 2); |
michael@0 | 624 | return finishOp(_plusMin, 1); |
michael@0 | 625 | } |
michael@0 | 626 | |
michael@0 | 627 | function readToken_lt_gt(code) { // '<>' |
michael@0 | 628 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 629 | var size = 1; |
michael@0 | 630 | if (next === code) { |
michael@0 | 631 | size = code === 62 && input.charCodeAt(tokPos + 2) === 62 ? 3 : 2; |
michael@0 | 632 | if (input.charCodeAt(tokPos + size) === 61) return finishOp(_assign, size + 1); |
michael@0 | 633 | return finishOp(_bitShift, size); |
michael@0 | 634 | } |
michael@0 | 635 | if (next == 33 && code == 60 && input.charCodeAt(tokPos + 2) == 45 && |
michael@0 | 636 | input.charCodeAt(tokPos + 3) == 45) { |
michael@0 | 637 | // `<!--`, an XML-style comment that should be interpreted as a line comment |
michael@0 | 638 | tokPos += 4; |
michael@0 | 639 | skipLineComment(); |
michael@0 | 640 | skipSpace(); |
michael@0 | 641 | return readToken(); |
michael@0 | 642 | } |
michael@0 | 643 | if (next === 61) |
michael@0 | 644 | size = input.charCodeAt(tokPos + 2) === 61 ? 3 : 2; |
michael@0 | 645 | return finishOp(_relational, size); |
michael@0 | 646 | } |
michael@0 | 647 | |
michael@0 | 648 | function readToken_eq_excl(code) { // '=!' |
michael@0 | 649 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 650 | if (next === 61) return finishOp(_equality, input.charCodeAt(tokPos + 2) === 61 ? 3 : 2); |
michael@0 | 651 | return finishOp(code === 61 ? _eq : _prefix, 1); |
michael@0 | 652 | } |
michael@0 | 653 | |
michael@0 | 654 | function getTokenFromCode(code) { |
michael@0 | 655 | switch(code) { |
michael@0 | 656 | // The interpretation of a dot depends on whether it is followed |
michael@0 | 657 | // by a digit. |
michael@0 | 658 | case 46: // '.' |
michael@0 | 659 | return readToken_dot(); |
michael@0 | 660 | |
michael@0 | 661 | // Punctuation tokens. |
michael@0 | 662 | case 40: ++tokPos; return finishToken(_parenL); |
michael@0 | 663 | case 41: ++tokPos; return finishToken(_parenR); |
michael@0 | 664 | case 59: ++tokPos; return finishToken(_semi); |
michael@0 | 665 | case 44: ++tokPos; return finishToken(_comma); |
michael@0 | 666 | case 91: ++tokPos; return finishToken(_bracketL); |
michael@0 | 667 | case 93: ++tokPos; return finishToken(_bracketR); |
michael@0 | 668 | case 123: ++tokPos; return finishToken(_braceL); |
michael@0 | 669 | case 125: ++tokPos; return finishToken(_braceR); |
michael@0 | 670 | case 58: ++tokPos; return finishToken(_colon); |
michael@0 | 671 | case 63: ++tokPos; return finishToken(_question); |
michael@0 | 672 | |
michael@0 | 673 | // '0x' is a hexadecimal number. |
michael@0 | 674 | case 48: // '0' |
michael@0 | 675 | var next = input.charCodeAt(tokPos + 1); |
michael@0 | 676 | if (next === 120 || next === 88) return readHexNumber(); |
michael@0 | 677 | // Anything else beginning with a digit is an integer, octal |
michael@0 | 678 | // number, or float. |
michael@0 | 679 | case 49: case 50: case 51: case 52: case 53: case 54: case 55: case 56: case 57: // 1-9 |
michael@0 | 680 | return readNumber(false); |
michael@0 | 681 | |
michael@0 | 682 | // Quotes produce strings. |
michael@0 | 683 | case 34: case 39: // '"', "'" |
michael@0 | 684 | return readString(code); |
michael@0 | 685 | |
michael@0 | 686 | // Operators are parsed inline in tiny state machines. '=' (61) is |
michael@0 | 687 | // often referred to. `finishOp` simply skips the amount of |
michael@0 | 688 | // characters it is given as second argument, and returns a token |
michael@0 | 689 | // of the type given by its first argument. |
michael@0 | 690 | |
michael@0 | 691 | case 47: // '/' |
michael@0 | 692 | return readToken_slash(code); |
michael@0 | 693 | |
michael@0 | 694 | case 37: case 42: // '%*' |
michael@0 | 695 | return readToken_mult_modulo(); |
michael@0 | 696 | |
michael@0 | 697 | case 124: case 38: // '|&' |
michael@0 | 698 | return readToken_pipe_amp(code); |
michael@0 | 699 | |
michael@0 | 700 | case 94: // '^' |
michael@0 | 701 | return readToken_caret(); |
michael@0 | 702 | |
michael@0 | 703 | case 43: case 45: // '+-' |
michael@0 | 704 | return readToken_plus_min(code); |
michael@0 | 705 | |
michael@0 | 706 | case 60: case 62: // '<>' |
michael@0 | 707 | return readToken_lt_gt(code); |
michael@0 | 708 | |
michael@0 | 709 | case 61: case 33: // '=!' |
michael@0 | 710 | return readToken_eq_excl(code); |
michael@0 | 711 | |
michael@0 | 712 | case 126: // '~' |
michael@0 | 713 | return finishOp(_prefix, 1); |
michael@0 | 714 | } |
michael@0 | 715 | |
michael@0 | 716 | return false; |
michael@0 | 717 | } |
michael@0 | 718 | |
michael@0 | 719 | function readToken(forceRegexp) { |
michael@0 | 720 | if (!forceRegexp) tokStart = tokPos; |
michael@0 | 721 | else tokPos = tokStart + 1; |
michael@0 | 722 | if (options.locations) tokStartLoc = new line_loc_t; |
michael@0 | 723 | if (forceRegexp) return readRegexp(); |
michael@0 | 724 | if (tokPos >= inputLen) return finishToken(_eof); |
michael@0 | 725 | |
michael@0 | 726 | var code = input.charCodeAt(tokPos); |
michael@0 | 727 | // Identifier or keyword. '\uXXXX' sequences are allowed in |
michael@0 | 728 | // identifiers, so '\' also dispatches to that. |
michael@0 | 729 | if (isIdentifierStart(code) || code === 92 /* '\' */) return readWord(); |
michael@0 | 730 | |
michael@0 | 731 | var tok = getTokenFromCode(code); |
michael@0 | 732 | |
michael@0 | 733 | if (tok === false) { |
michael@0 | 734 | // If we are here, we either found a non-ASCII identifier |
michael@0 | 735 | // character, or something that's entirely disallowed. |
michael@0 | 736 | var ch = String.fromCharCode(code); |
michael@0 | 737 | if (ch === "\\" || nonASCIIidentifierStart.test(ch)) return readWord(); |
michael@0 | 738 | raise(tokPos, "Unexpected character '" + ch + "'"); |
michael@0 | 739 | } |
michael@0 | 740 | return tok; |
michael@0 | 741 | } |
michael@0 | 742 | |
michael@0 | 743 | function finishOp(type, size) { |
michael@0 | 744 | var str = input.slice(tokPos, tokPos + size); |
michael@0 | 745 | tokPos += size; |
michael@0 | 746 | finishToken(type, str); |
michael@0 | 747 | } |
michael@0 | 748 | |
michael@0 | 749 | // Parse a regular expression. Some context-awareness is necessary, |
michael@0 | 750 | // since a '/' inside a '[]' set does not end the expression. |
michael@0 | 751 | |
michael@0 | 752 | function readRegexp() { |
michael@0 | 753 | var content = "", escaped, inClass, start = tokPos; |
michael@0 | 754 | for (;;) { |
michael@0 | 755 | if (tokPos >= inputLen) raise(start, "Unterminated regular expression"); |
michael@0 | 756 | var ch = input.charAt(tokPos); |
michael@0 | 757 | if (newline.test(ch)) raise(start, "Unterminated regular expression"); |
michael@0 | 758 | if (!escaped) { |
michael@0 | 759 | if (ch === "[") inClass = true; |
michael@0 | 760 | else if (ch === "]" && inClass) inClass = false; |
michael@0 | 761 | else if (ch === "/" && !inClass) break; |
michael@0 | 762 | escaped = ch === "\\"; |
michael@0 | 763 | } else escaped = false; |
michael@0 | 764 | ++tokPos; |
michael@0 | 765 | } |
michael@0 | 766 | var content = input.slice(start, tokPos); |
michael@0 | 767 | ++tokPos; |
michael@0 | 768 | // Need to use `readWord1` because '\uXXXX' sequences are allowed |
michael@0 | 769 | // here (don't ask). |
michael@0 | 770 | var mods = readWord1(); |
michael@0 | 771 | if (mods && !/^[gmsiy]*$/.test(mods)) raise(start, "Invalid regexp flag"); |
michael@0 | 772 | try { |
michael@0 | 773 | var value = new RegExp(content, mods); |
michael@0 | 774 | } catch (e) { |
michael@0 | 775 | if (e instanceof SyntaxError) raise(start, e.message); |
michael@0 | 776 | raise(e); |
michael@0 | 777 | } |
michael@0 | 778 | return finishToken(_regexp, value); |
michael@0 | 779 | } |
michael@0 | 780 | |
michael@0 | 781 | // Read an integer in the given radix. Return null if zero digits |
michael@0 | 782 | // were read, the integer value otherwise. When `len` is given, this |
michael@0 | 783 | // will return `null` unless the integer has exactly `len` digits. |
michael@0 | 784 | |
michael@0 | 785 | function readInt(radix, len) { |
michael@0 | 786 | var start = tokPos, total = 0; |
michael@0 | 787 | for (var i = 0, e = len == null ? Infinity : len; i < e; ++i) { |
michael@0 | 788 | var code = input.charCodeAt(tokPos), val; |
michael@0 | 789 | if (code >= 97) val = code - 97 + 10; // a |
michael@0 | 790 | else if (code >= 65) val = code - 65 + 10; // A |
michael@0 | 791 | else if (code >= 48 && code <= 57) val = code - 48; // 0-9 |
michael@0 | 792 | else val = Infinity; |
michael@0 | 793 | if (val >= radix) break; |
michael@0 | 794 | ++tokPos; |
michael@0 | 795 | total = total * radix + val; |
michael@0 | 796 | } |
michael@0 | 797 | if (tokPos === start || len != null && tokPos - start !== len) return null; |
michael@0 | 798 | |
michael@0 | 799 | return total; |
michael@0 | 800 | } |
michael@0 | 801 | |
michael@0 | 802 | function readHexNumber() { |
michael@0 | 803 | tokPos += 2; // 0x |
michael@0 | 804 | var val = readInt(16); |
michael@0 | 805 | if (val == null) raise(tokStart + 2, "Expected hexadecimal number"); |
michael@0 | 806 | if (isIdentifierStart(input.charCodeAt(tokPos))) raise(tokPos, "Identifier directly after number"); |
michael@0 | 807 | return finishToken(_num, val); |
michael@0 | 808 | } |
michael@0 | 809 | |
michael@0 | 810 | // Read an integer, octal integer, or floating-point number. |
michael@0 | 811 | |
michael@0 | 812 | function readNumber(startsWithDot) { |
michael@0 | 813 | var start = tokPos, isFloat = false, octal = input.charCodeAt(tokPos) === 48; |
michael@0 | 814 | if (!startsWithDot && readInt(10) === null) raise(start, "Invalid number"); |
michael@0 | 815 | if (input.charCodeAt(tokPos) === 46) { |
michael@0 | 816 | ++tokPos; |
michael@0 | 817 | readInt(10); |
michael@0 | 818 | isFloat = true; |
michael@0 | 819 | } |
michael@0 | 820 | var next = input.charCodeAt(tokPos); |
michael@0 | 821 | if (next === 69 || next === 101) { // 'eE' |
michael@0 | 822 | next = input.charCodeAt(++tokPos); |
michael@0 | 823 | if (next === 43 || next === 45) ++tokPos; // '+-' |
michael@0 | 824 | if (readInt(10) === null) raise(start, "Invalid number"); |
michael@0 | 825 | isFloat = true; |
michael@0 | 826 | } |
michael@0 | 827 | if (isIdentifierStart(input.charCodeAt(tokPos))) raise(tokPos, "Identifier directly after number"); |
michael@0 | 828 | |
michael@0 | 829 | var str = input.slice(start, tokPos), val; |
michael@0 | 830 | if (isFloat) val = parseFloat(str); |
michael@0 | 831 | else if (!octal || str.length === 1) val = parseInt(str, 10); |
michael@0 | 832 | else if (/[89]/.test(str) || strict) raise(start, "Invalid number"); |
michael@0 | 833 | else val = parseInt(str, 8); |
michael@0 | 834 | return finishToken(_num, val); |
michael@0 | 835 | } |
michael@0 | 836 | |
michael@0 | 837 | // Read a string value, interpreting backslash-escapes. |
michael@0 | 838 | |
michael@0 | 839 | function readString(quote) { |
michael@0 | 840 | tokPos++; |
michael@0 | 841 | var out = ""; |
michael@0 | 842 | for (;;) { |
michael@0 | 843 | if (tokPos >= inputLen) raise(tokStart, "Unterminated string constant"); |
michael@0 | 844 | var ch = input.charCodeAt(tokPos); |
michael@0 | 845 | if (ch === quote) { |
michael@0 | 846 | ++tokPos; |
michael@0 | 847 | return finishToken(_string, out); |
michael@0 | 848 | } |
michael@0 | 849 | if (ch === 92) { // '\' |
michael@0 | 850 | ch = input.charCodeAt(++tokPos); |
michael@0 | 851 | var octal = /^[0-7]+/.exec(input.slice(tokPos, tokPos + 3)); |
michael@0 | 852 | if (octal) octal = octal[0]; |
michael@0 | 853 | while (octal && parseInt(octal, 8) > 255) octal = octal.slice(0, -1); |
michael@0 | 854 | if (octal === "0") octal = null; |
michael@0 | 855 | ++tokPos; |
michael@0 | 856 | if (octal) { |
michael@0 | 857 | if (strict) raise(tokPos - 2, "Octal literal in strict mode"); |
michael@0 | 858 | out += String.fromCharCode(parseInt(octal, 8)); |
michael@0 | 859 | tokPos += octal.length - 1; |
michael@0 | 860 | } else { |
michael@0 | 861 | switch (ch) { |
michael@0 | 862 | case 110: out += "\n"; break; // 'n' -> '\n' |
michael@0 | 863 | case 114: out += "\r"; break; // 'r' -> '\r' |
michael@0 | 864 | case 120: out += String.fromCharCode(readHexChar(2)); break; // 'x' |
michael@0 | 865 | case 117: out += String.fromCharCode(readHexChar(4)); break; // 'u' |
michael@0 | 866 | case 85: out += String.fromCharCode(readHexChar(8)); break; // 'U' |
michael@0 | 867 | case 116: out += "\t"; break; // 't' -> '\t' |
michael@0 | 868 | case 98: out += "\b"; break; // 'b' -> '\b' |
michael@0 | 869 | case 118: out += "\u000b"; break; // 'v' -> '\u000b' |
michael@0 | 870 | case 102: out += "\f"; break; // 'f' -> '\f' |
michael@0 | 871 | case 48: out += "\0"; break; // 0 -> '\0' |
michael@0 | 872 | case 13: if (input.charCodeAt(tokPos) === 10) ++tokPos; // '\r\n' |
michael@0 | 873 | case 10: // ' \n' |
michael@0 | 874 | if (options.locations) { tokLineStart = tokPos; ++tokCurLine; } |
michael@0 | 875 | break; |
michael@0 | 876 | default: out += String.fromCharCode(ch); break; |
michael@0 | 877 | } |
michael@0 | 878 | } |
michael@0 | 879 | } else { |
michael@0 | 880 | if (ch === 13 || ch === 10 || ch === 8232 || ch === 8233) raise(tokStart, "Unterminated string constant"); |
michael@0 | 881 | out += String.fromCharCode(ch); // '\' |
michael@0 | 882 | ++tokPos; |
michael@0 | 883 | } |
michael@0 | 884 | } |
michael@0 | 885 | } |
michael@0 | 886 | |
michael@0 | 887 | // Used to read character escape sequences ('\x', '\u', '\U'). |
michael@0 | 888 | |
michael@0 | 889 | function readHexChar(len) { |
michael@0 | 890 | var n = readInt(16, len); |
michael@0 | 891 | if (n === null) raise(tokStart, "Bad character escape sequence"); |
michael@0 | 892 | return n; |
michael@0 | 893 | } |
michael@0 | 894 | |
michael@0 | 895 | // Used to signal to callers of `readWord1` whether the word |
michael@0 | 896 | // contained any escape sequences. This is needed because words with |
michael@0 | 897 | // escape sequences must not be interpreted as keywords. |
michael@0 | 898 | |
michael@0 | 899 | var containsEsc; |
michael@0 | 900 | |
michael@0 | 901 | // Read an identifier, and return it as a string. Sets `containsEsc` |
michael@0 | 902 | // to whether the word contained a '\u' escape. |
michael@0 | 903 | // |
michael@0 | 904 | // Only builds up the word character-by-character when it actually |
michael@0 | 905 | // containeds an escape, as a micro-optimization. |
michael@0 | 906 | |
michael@0 | 907 | function readWord1() { |
michael@0 | 908 | containsEsc = false; |
michael@0 | 909 | var word, first = true, start = tokPos; |
michael@0 | 910 | for (;;) { |
michael@0 | 911 | var ch = input.charCodeAt(tokPos); |
michael@0 | 912 | if (isIdentifierChar(ch)) { |
michael@0 | 913 | if (containsEsc) word += input.charAt(tokPos); |
michael@0 | 914 | ++tokPos; |
michael@0 | 915 | } else if (ch === 92) { // "\" |
michael@0 | 916 | if (!containsEsc) word = input.slice(start, tokPos); |
michael@0 | 917 | containsEsc = true; |
michael@0 | 918 | if (input.charCodeAt(++tokPos) != 117) // "u" |
michael@0 | 919 | raise(tokPos, "Expecting Unicode escape sequence \\uXXXX"); |
michael@0 | 920 | ++tokPos; |
michael@0 | 921 | var esc = readHexChar(4); |
michael@0 | 922 | var escStr = String.fromCharCode(esc); |
michael@0 | 923 | if (!escStr) raise(tokPos - 1, "Invalid Unicode escape"); |
michael@0 | 924 | if (!(first ? isIdentifierStart(esc) : isIdentifierChar(esc))) |
michael@0 | 925 | raise(tokPos - 4, "Invalid Unicode escape"); |
michael@0 | 926 | word += escStr; |
michael@0 | 927 | } else { |
michael@0 | 928 | break; |
michael@0 | 929 | } |
michael@0 | 930 | first = false; |
michael@0 | 931 | } |
michael@0 | 932 | return containsEsc ? word : input.slice(start, tokPos); |
michael@0 | 933 | } |
michael@0 | 934 | |
michael@0 | 935 | // Read an identifier or keyword token. Will check for reserved |
michael@0 | 936 | // words when necessary. |
michael@0 | 937 | |
michael@0 | 938 | function readWord() { |
michael@0 | 939 | var word = readWord1(); |
michael@0 | 940 | var type = _name; |
michael@0 | 941 | if (!containsEsc) { |
michael@0 | 942 | if (isKeyword(word)) type = keywordTypes[word]; |
michael@0 | 943 | else if (options.forbidReserved && |
michael@0 | 944 | (options.ecmaVersion === 3 ? isReservedWord3 : isReservedWord5)(word) || |
michael@0 | 945 | strict && isStrictReservedWord(word)) |
michael@0 | 946 | raise(tokStart, "The keyword '" + word + "' is reserved"); |
michael@0 | 947 | } |
michael@0 | 948 | return finishToken(type, word); |
michael@0 | 949 | } |
michael@0 | 950 | |
michael@0 | 951 | // ## Parser |
michael@0 | 952 | |
michael@0 | 953 | // A recursive descent parser operates by defining functions for all |
michael@0 | 954 | // syntactic elements, and recursively calling those, each function |
michael@0 | 955 | // advancing the input stream and returning an AST node. Precedence |
michael@0 | 956 | // of constructs (for example, the fact that `!x[1]` means `!(x[1])` |
michael@0 | 957 | // instead of `(!x)[1]` is handled by the fact that the parser |
michael@0 | 958 | // function that parses unary prefix operators is called first, and |
michael@0 | 959 | // in turn calls the function that parses `[]` subscripts — that |
michael@0 | 960 | // way, it'll receive the node for `x[1]` already parsed, and wraps |
michael@0 | 961 | // *that* in the unary operator node. |
michael@0 | 962 | // |
michael@0 | 963 | // Acorn uses an [operator precedence parser][opp] to handle binary |
michael@0 | 964 | // operator precedence, because it is much more compact than using |
michael@0 | 965 | // the technique outlined above, which uses different, nesting |
michael@0 | 966 | // functions to specify precedence, for all of the ten binary |
michael@0 | 967 | // precedence levels that JavaScript defines. |
michael@0 | 968 | // |
michael@0 | 969 | // [opp]: http://en.wikipedia.org/wiki/Operator-precedence_parser |
michael@0 | 970 | |
michael@0 | 971 | // ### Parser utilities |
michael@0 | 972 | |
michael@0 | 973 | // Continue to the next token. |
michael@0 | 974 | |
michael@0 | 975 | function next() { |
michael@0 | 976 | lastStart = tokStart; |
michael@0 | 977 | lastEnd = tokEnd; |
michael@0 | 978 | lastEndLoc = tokEndLoc; |
michael@0 | 979 | readToken(); |
michael@0 | 980 | } |
michael@0 | 981 | |
michael@0 | 982 | // Enter strict mode. Re-reads the next token to please pedantic |
michael@0 | 983 | // tests ("use strict"; 010; -- should fail). |
michael@0 | 984 | |
michael@0 | 985 | function setStrict(strct) { |
michael@0 | 986 | strict = strct; |
michael@0 | 987 | tokPos = tokStart; |
michael@0 | 988 | if (options.locations) { |
michael@0 | 989 | while (tokPos < tokLineStart) { |
michael@0 | 990 | tokLineStart = input.lastIndexOf("\n", tokLineStart - 2) + 1; |
michael@0 | 991 | --tokCurLine; |
michael@0 | 992 | } |
michael@0 | 993 | } |
michael@0 | 994 | skipSpace(); |
michael@0 | 995 | readToken(); |
michael@0 | 996 | } |
michael@0 | 997 | |
michael@0 | 998 | // Start an AST node, attaching a start offset. |
michael@0 | 999 | |
michael@0 | 1000 | function node_t() { |
michael@0 | 1001 | this.type = null; |
michael@0 | 1002 | this.start = tokStart; |
michael@0 | 1003 | this.end = null; |
michael@0 | 1004 | } |
michael@0 | 1005 | |
michael@0 | 1006 | function node_loc_t() { |
michael@0 | 1007 | this.start = tokStartLoc; |
michael@0 | 1008 | this.end = null; |
michael@0 | 1009 | if (sourceFile !== null) this.source = sourceFile; |
michael@0 | 1010 | } |
michael@0 | 1011 | |
michael@0 | 1012 | function startNode() { |
michael@0 | 1013 | var node = new node_t(); |
michael@0 | 1014 | if (options.locations) |
michael@0 | 1015 | node.loc = new node_loc_t(); |
michael@0 | 1016 | if (options.directSourceFile) |
michael@0 | 1017 | node.sourceFile = options.directSourceFile; |
michael@0 | 1018 | if (options.ranges) |
michael@0 | 1019 | node.range = [tokStart, 0]; |
michael@0 | 1020 | return node; |
michael@0 | 1021 | } |
michael@0 | 1022 | |
michael@0 | 1023 | // Start a node whose start offset information should be based on |
michael@0 | 1024 | // the start of another node. For example, a binary operator node is |
michael@0 | 1025 | // only started after its left-hand side has already been parsed. |
michael@0 | 1026 | |
michael@0 | 1027 | function startNodeFrom(other) { |
michael@0 | 1028 | var node = new node_t(); |
michael@0 | 1029 | node.start = other.start; |
michael@0 | 1030 | if (options.locations) { |
michael@0 | 1031 | node.loc = new node_loc_t(); |
michael@0 | 1032 | node.loc.start = other.loc.start; |
michael@0 | 1033 | } |
michael@0 | 1034 | if (options.ranges) |
michael@0 | 1035 | node.range = [other.range[0], 0]; |
michael@0 | 1036 | |
michael@0 | 1037 | return node; |
michael@0 | 1038 | } |
michael@0 | 1039 | |
michael@0 | 1040 | // Finish an AST node, adding `type` and `end` properties. |
michael@0 | 1041 | |
michael@0 | 1042 | function finishNode(node, type) { |
michael@0 | 1043 | node.type = type; |
michael@0 | 1044 | node.end = lastEnd; |
michael@0 | 1045 | if (options.locations) |
michael@0 | 1046 | node.loc.end = lastEndLoc; |
michael@0 | 1047 | if (options.ranges) |
michael@0 | 1048 | node.range[1] = lastEnd; |
michael@0 | 1049 | return node; |
michael@0 | 1050 | } |
michael@0 | 1051 | |
michael@0 | 1052 | // Test whether a statement node is the string literal `"use strict"`. |
michael@0 | 1053 | |
michael@0 | 1054 | function isUseStrict(stmt) { |
michael@0 | 1055 | return options.ecmaVersion >= 5 && stmt.type === "ExpressionStatement" && |
michael@0 | 1056 | stmt.expression.type === "Literal" && stmt.expression.value === "use strict"; |
michael@0 | 1057 | } |
michael@0 | 1058 | |
michael@0 | 1059 | // Predicate that tests whether the next token is of the given |
michael@0 | 1060 | // type, and if yes, consumes it as a side effect. |
michael@0 | 1061 | |
michael@0 | 1062 | function eat(type) { |
michael@0 | 1063 | if (tokType === type) { |
michael@0 | 1064 | next(); |
michael@0 | 1065 | return true; |
michael@0 | 1066 | } |
michael@0 | 1067 | } |
michael@0 | 1068 | |
michael@0 | 1069 | // Test whether a semicolon can be inserted at the current position. |
michael@0 | 1070 | |
michael@0 | 1071 | function canInsertSemicolon() { |
michael@0 | 1072 | return !options.strictSemicolons && |
michael@0 | 1073 | (tokType === _eof || tokType === _braceR || newline.test(input.slice(lastEnd, tokStart))); |
michael@0 | 1074 | } |
michael@0 | 1075 | |
michael@0 | 1076 | // Consume a semicolon, or, failing that, see if we are allowed to |
michael@0 | 1077 | // pretend that there is a semicolon at this position. |
michael@0 | 1078 | |
michael@0 | 1079 | function semicolon() { |
michael@0 | 1080 | if (!eat(_semi) && !canInsertSemicolon()) unexpected(); |
michael@0 | 1081 | } |
michael@0 | 1082 | |
michael@0 | 1083 | // Expect a token of a given type. If found, consume it, otherwise, |
michael@0 | 1084 | // raise an unexpected token error. |
michael@0 | 1085 | |
michael@0 | 1086 | function expect(type) { |
michael@0 | 1087 | if (tokType === type) next(); |
michael@0 | 1088 | else unexpected(); |
michael@0 | 1089 | } |
michael@0 | 1090 | |
michael@0 | 1091 | // Raise an unexpected token error. |
michael@0 | 1092 | |
michael@0 | 1093 | function unexpected() { |
michael@0 | 1094 | raise(tokStart, "Unexpected token"); |
michael@0 | 1095 | } |
michael@0 | 1096 | |
michael@0 | 1097 | // Verify that a node is an lval — something that can be assigned |
michael@0 | 1098 | // to. |
michael@0 | 1099 | |
michael@0 | 1100 | function checkLVal(expr) { |
michael@0 | 1101 | if (expr.type !== "Identifier" && expr.type !== "MemberExpression") |
michael@0 | 1102 | raise(expr.start, "Assigning to rvalue"); |
michael@0 | 1103 | if (strict && expr.type === "Identifier" && isStrictBadIdWord(expr.name)) |
michael@0 | 1104 | raise(expr.start, "Assigning to " + expr.name + " in strict mode"); |
michael@0 | 1105 | } |
michael@0 | 1106 | |
michael@0 | 1107 | // ### Statement parsing |
michael@0 | 1108 | |
michael@0 | 1109 | // Parse a program. Initializes the parser, reads any number of |
michael@0 | 1110 | // statements, and wraps them in a Program node. Optionally takes a |
michael@0 | 1111 | // `program` argument. If present, the statements will be appended |
michael@0 | 1112 | // to its body instead of creating a new node. |
michael@0 | 1113 | |
michael@0 | 1114 | function parseTopLevel(program) { |
michael@0 | 1115 | lastStart = lastEnd = tokPos; |
michael@0 | 1116 | if (options.locations) lastEndLoc = new line_loc_t; |
michael@0 | 1117 | inFunction = strict = null; |
michael@0 | 1118 | labels = []; |
michael@0 | 1119 | readToken(); |
michael@0 | 1120 | |
michael@0 | 1121 | var node = program || startNode(), first = true; |
michael@0 | 1122 | if (!program) node.body = []; |
michael@0 | 1123 | while (tokType !== _eof) { |
michael@0 | 1124 | var stmt = parseStatement(); |
michael@0 | 1125 | node.body.push(stmt); |
michael@0 | 1126 | if (first && isUseStrict(stmt)) setStrict(true); |
michael@0 | 1127 | first = false; |
michael@0 | 1128 | } |
michael@0 | 1129 | return finishNode(node, "Program"); |
michael@0 | 1130 | } |
michael@0 | 1131 | |
michael@0 | 1132 | var loopLabel = {kind: "loop"}, switchLabel = {kind: "switch"}; |
michael@0 | 1133 | |
michael@0 | 1134 | // Parse a single statement. |
michael@0 | 1135 | // |
michael@0 | 1136 | // If expecting a statement and finding a slash operator, parse a |
michael@0 | 1137 | // regular expression literal. This is to handle cases like |
michael@0 | 1138 | // `if (foo) /blah/.exec(foo);`, where looking at the previous token |
michael@0 | 1139 | // does not help. |
michael@0 | 1140 | |
michael@0 | 1141 | function parseStatement() { |
michael@0 | 1142 | if (tokType === _slash || tokType === _assign && tokVal == "/=") |
michael@0 | 1143 | readToken(true); |
michael@0 | 1144 | |
michael@0 | 1145 | var starttype = tokType, node = startNode(); |
michael@0 | 1146 | |
michael@0 | 1147 | // Most types of statements are recognized by the keyword they |
michael@0 | 1148 | // start with. Many are trivial to parse, some require a bit of |
michael@0 | 1149 | // complexity. |
michael@0 | 1150 | |
michael@0 | 1151 | switch (starttype) { |
michael@0 | 1152 | case _break: case _continue: |
michael@0 | 1153 | next(); |
michael@0 | 1154 | var isBreak = starttype === _break; |
michael@0 | 1155 | if (eat(_semi) || canInsertSemicolon()) node.label = null; |
michael@0 | 1156 | else if (tokType !== _name) unexpected(); |
michael@0 | 1157 | else { |
michael@0 | 1158 | node.label = parseIdent(); |
michael@0 | 1159 | semicolon(); |
michael@0 | 1160 | } |
michael@0 | 1161 | |
michael@0 | 1162 | // Verify that there is an actual destination to break or |
michael@0 | 1163 | // continue to. |
michael@0 | 1164 | for (var i = 0; i < labels.length; ++i) { |
michael@0 | 1165 | var lab = labels[i]; |
michael@0 | 1166 | if (node.label == null || lab.name === node.label.name) { |
michael@0 | 1167 | if (lab.kind != null && (isBreak || lab.kind === "loop")) break; |
michael@0 | 1168 | if (node.label && isBreak) break; |
michael@0 | 1169 | } |
michael@0 | 1170 | } |
michael@0 | 1171 | if (i === labels.length) raise(node.start, "Unsyntactic " + starttype.keyword); |
michael@0 | 1172 | return finishNode(node, isBreak ? "BreakStatement" : "ContinueStatement"); |
michael@0 | 1173 | |
michael@0 | 1174 | case _debugger: |
michael@0 | 1175 | next(); |
michael@0 | 1176 | semicolon(); |
michael@0 | 1177 | return finishNode(node, "DebuggerStatement"); |
michael@0 | 1178 | |
michael@0 | 1179 | case _do: |
michael@0 | 1180 | next(); |
michael@0 | 1181 | labels.push(loopLabel); |
michael@0 | 1182 | node.body = parseStatement(); |
michael@0 | 1183 | labels.pop(); |
michael@0 | 1184 | expect(_while); |
michael@0 | 1185 | node.test = parseParenExpression(); |
michael@0 | 1186 | semicolon(); |
michael@0 | 1187 | return finishNode(node, "DoWhileStatement"); |
michael@0 | 1188 | |
michael@0 | 1189 | // Disambiguating between a `for` and a `for`/`in` loop is |
michael@0 | 1190 | // non-trivial. Basically, we have to parse the init `var` |
michael@0 | 1191 | // statement or expression, disallowing the `in` operator (see |
michael@0 | 1192 | // the second parameter to `parseExpression`), and then check |
michael@0 | 1193 | // whether the next token is `in`. When there is no init part |
michael@0 | 1194 | // (semicolon immediately after the opening parenthesis), it is |
michael@0 | 1195 | // a regular `for` loop. |
michael@0 | 1196 | |
michael@0 | 1197 | case _for: |
michael@0 | 1198 | next(); |
michael@0 | 1199 | labels.push(loopLabel); |
michael@0 | 1200 | expect(_parenL); |
michael@0 | 1201 | if (tokType === _semi) return parseFor(node, null); |
michael@0 | 1202 | if (tokType === _var) { |
michael@0 | 1203 | var init = startNode(); |
michael@0 | 1204 | next(); |
michael@0 | 1205 | parseVar(init, true); |
michael@0 | 1206 | finishNode(init, "VariableDeclaration"); |
michael@0 | 1207 | if (init.declarations.length === 1 && eat(_in)) |
michael@0 | 1208 | return parseForIn(node, init); |
michael@0 | 1209 | return parseFor(node, init); |
michael@0 | 1210 | } |
michael@0 | 1211 | var init = parseExpression(false, true); |
michael@0 | 1212 | if (eat(_in)) {checkLVal(init); return parseForIn(node, init);} |
michael@0 | 1213 | return parseFor(node, init); |
michael@0 | 1214 | |
michael@0 | 1215 | case _function: |
michael@0 | 1216 | next(); |
michael@0 | 1217 | return parseFunction(node, true); |
michael@0 | 1218 | |
michael@0 | 1219 | case _if: |
michael@0 | 1220 | next(); |
michael@0 | 1221 | node.test = parseParenExpression(); |
michael@0 | 1222 | node.consequent = parseStatement(); |
michael@0 | 1223 | node.alternate = eat(_else) ? parseStatement() : null; |
michael@0 | 1224 | return finishNode(node, "IfStatement"); |
michael@0 | 1225 | |
michael@0 | 1226 | case _return: |
michael@0 | 1227 | if (!inFunction) raise(tokStart, "'return' outside of function"); |
michael@0 | 1228 | next(); |
michael@0 | 1229 | |
michael@0 | 1230 | // In `return` (and `break`/`continue`), the keywords with |
michael@0 | 1231 | // optional arguments, we eagerly look for a semicolon or the |
michael@0 | 1232 | // possibility to insert one. |
michael@0 | 1233 | |
michael@0 | 1234 | if (eat(_semi) || canInsertSemicolon()) node.argument = null; |
michael@0 | 1235 | else { node.argument = parseExpression(); semicolon(); } |
michael@0 | 1236 | return finishNode(node, "ReturnStatement"); |
michael@0 | 1237 | |
michael@0 | 1238 | case _switch: |
michael@0 | 1239 | next(); |
michael@0 | 1240 | node.discriminant = parseParenExpression(); |
michael@0 | 1241 | node.cases = []; |
michael@0 | 1242 | expect(_braceL); |
michael@0 | 1243 | labels.push(switchLabel); |
michael@0 | 1244 | |
michael@0 | 1245 | // Statements under must be grouped (by label) in SwitchCase |
michael@0 | 1246 | // nodes. `cur` is used to keep the node that we are currently |
michael@0 | 1247 | // adding statements to. |
michael@0 | 1248 | |
michael@0 | 1249 | for (var cur, sawDefault; tokType != _braceR;) { |
michael@0 | 1250 | if (tokType === _case || tokType === _default) { |
michael@0 | 1251 | var isCase = tokType === _case; |
michael@0 | 1252 | if (cur) finishNode(cur, "SwitchCase"); |
michael@0 | 1253 | node.cases.push(cur = startNode()); |
michael@0 | 1254 | cur.consequent = []; |
michael@0 | 1255 | next(); |
michael@0 | 1256 | if (isCase) cur.test = parseExpression(); |
michael@0 | 1257 | else { |
michael@0 | 1258 | if (sawDefault) raise(lastStart, "Multiple default clauses"); sawDefault = true; |
michael@0 | 1259 | cur.test = null; |
michael@0 | 1260 | } |
michael@0 | 1261 | expect(_colon); |
michael@0 | 1262 | } else { |
michael@0 | 1263 | if (!cur) unexpected(); |
michael@0 | 1264 | cur.consequent.push(parseStatement()); |
michael@0 | 1265 | } |
michael@0 | 1266 | } |
michael@0 | 1267 | if (cur) finishNode(cur, "SwitchCase"); |
michael@0 | 1268 | next(); // Closing brace |
michael@0 | 1269 | labels.pop(); |
michael@0 | 1270 | return finishNode(node, "SwitchStatement"); |
michael@0 | 1271 | |
michael@0 | 1272 | case _throw: |
michael@0 | 1273 | next(); |
michael@0 | 1274 | if (newline.test(input.slice(lastEnd, tokStart))) |
michael@0 | 1275 | raise(lastEnd, "Illegal newline after throw"); |
michael@0 | 1276 | node.argument = parseExpression(); |
michael@0 | 1277 | semicolon(); |
michael@0 | 1278 | return finishNode(node, "ThrowStatement"); |
michael@0 | 1279 | |
michael@0 | 1280 | case _try: |
michael@0 | 1281 | next(); |
michael@0 | 1282 | node.block = parseBlock(); |
michael@0 | 1283 | node.handler = null; |
michael@0 | 1284 | if (tokType === _catch) { |
michael@0 | 1285 | var clause = startNode(); |
michael@0 | 1286 | next(); |
michael@0 | 1287 | expect(_parenL); |
michael@0 | 1288 | clause.param = parseIdent(); |
michael@0 | 1289 | if (strict && isStrictBadIdWord(clause.param.name)) |
michael@0 | 1290 | raise(clause.param.start, "Binding " + clause.param.name + " in strict mode"); |
michael@0 | 1291 | expect(_parenR); |
michael@0 | 1292 | clause.guard = null; |
michael@0 | 1293 | clause.body = parseBlock(); |
michael@0 | 1294 | node.handler = finishNode(clause, "CatchClause"); |
michael@0 | 1295 | } |
michael@0 | 1296 | node.guardedHandlers = empty; |
michael@0 | 1297 | node.finalizer = eat(_finally) ? parseBlock() : null; |
michael@0 | 1298 | if (!node.handler && !node.finalizer) |
michael@0 | 1299 | raise(node.start, "Missing catch or finally clause"); |
michael@0 | 1300 | return finishNode(node, "TryStatement"); |
michael@0 | 1301 | |
michael@0 | 1302 | case _var: |
michael@0 | 1303 | next(); |
michael@0 | 1304 | parseVar(node); |
michael@0 | 1305 | semicolon(); |
michael@0 | 1306 | return finishNode(node, "VariableDeclaration"); |
michael@0 | 1307 | |
michael@0 | 1308 | case _while: |
michael@0 | 1309 | next(); |
michael@0 | 1310 | node.test = parseParenExpression(); |
michael@0 | 1311 | labels.push(loopLabel); |
michael@0 | 1312 | node.body = parseStatement(); |
michael@0 | 1313 | labels.pop(); |
michael@0 | 1314 | return finishNode(node, "WhileStatement"); |
michael@0 | 1315 | |
michael@0 | 1316 | case _with: |
michael@0 | 1317 | if (strict) raise(tokStart, "'with' in strict mode"); |
michael@0 | 1318 | next(); |
michael@0 | 1319 | node.object = parseParenExpression(); |
michael@0 | 1320 | node.body = parseStatement(); |
michael@0 | 1321 | return finishNode(node, "WithStatement"); |
michael@0 | 1322 | |
michael@0 | 1323 | case _braceL: |
michael@0 | 1324 | return parseBlock(); |
michael@0 | 1325 | |
michael@0 | 1326 | case _semi: |
michael@0 | 1327 | next(); |
michael@0 | 1328 | return finishNode(node, "EmptyStatement"); |
michael@0 | 1329 | |
michael@0 | 1330 | // If the statement does not start with a statement keyword or a |
michael@0 | 1331 | // brace, it's an ExpressionStatement or LabeledStatement. We |
michael@0 | 1332 | // simply start parsing an expression, and afterwards, if the |
michael@0 | 1333 | // next token is a colon and the expression was a simple |
michael@0 | 1334 | // Identifier node, we switch to interpreting it as a label. |
michael@0 | 1335 | |
michael@0 | 1336 | default: |
michael@0 | 1337 | var maybeName = tokVal, expr = parseExpression(); |
michael@0 | 1338 | if (starttype === _name && expr.type === "Identifier" && eat(_colon)) { |
michael@0 | 1339 | for (var i = 0; i < labels.length; ++i) |
michael@0 | 1340 | if (labels[i].name === maybeName) raise(expr.start, "Label '" + maybeName + "' is already declared"); |
michael@0 | 1341 | var kind = tokType.isLoop ? "loop" : tokType === _switch ? "switch" : null; |
michael@0 | 1342 | labels.push({name: maybeName, kind: kind}); |
michael@0 | 1343 | node.body = parseStatement(); |
michael@0 | 1344 | labels.pop(); |
michael@0 | 1345 | node.label = expr; |
michael@0 | 1346 | return finishNode(node, "LabeledStatement"); |
michael@0 | 1347 | } else { |
michael@0 | 1348 | node.expression = expr; |
michael@0 | 1349 | semicolon(); |
michael@0 | 1350 | return finishNode(node, "ExpressionStatement"); |
michael@0 | 1351 | } |
michael@0 | 1352 | } |
michael@0 | 1353 | } |
michael@0 | 1354 | |
michael@0 | 1355 | // Used for constructs like `switch` and `if` that insist on |
michael@0 | 1356 | // parentheses around their expression. |
michael@0 | 1357 | |
michael@0 | 1358 | function parseParenExpression() { |
michael@0 | 1359 | expect(_parenL); |
michael@0 | 1360 | var val = parseExpression(); |
michael@0 | 1361 | expect(_parenR); |
michael@0 | 1362 | return val; |
michael@0 | 1363 | } |
michael@0 | 1364 | |
michael@0 | 1365 | // Parse a semicolon-enclosed block of statements, handling `"use |
michael@0 | 1366 | // strict"` declarations when `allowStrict` is true (used for |
michael@0 | 1367 | // function bodies). |
michael@0 | 1368 | |
michael@0 | 1369 | function parseBlock(allowStrict) { |
michael@0 | 1370 | var node = startNode(), first = true, strict = false, oldStrict; |
michael@0 | 1371 | node.body = []; |
michael@0 | 1372 | expect(_braceL); |
michael@0 | 1373 | while (!eat(_braceR)) { |
michael@0 | 1374 | var stmt = parseStatement(); |
michael@0 | 1375 | node.body.push(stmt); |
michael@0 | 1376 | if (first && allowStrict && isUseStrict(stmt)) { |
michael@0 | 1377 | oldStrict = strict; |
michael@0 | 1378 | setStrict(strict = true); |
michael@0 | 1379 | } |
michael@0 | 1380 | first = false; |
michael@0 | 1381 | } |
michael@0 | 1382 | if (strict && !oldStrict) setStrict(false); |
michael@0 | 1383 | return finishNode(node, "BlockStatement"); |
michael@0 | 1384 | } |
michael@0 | 1385 | |
michael@0 | 1386 | // Parse a regular `for` loop. The disambiguation code in |
michael@0 | 1387 | // `parseStatement` will already have parsed the init statement or |
michael@0 | 1388 | // expression. |
michael@0 | 1389 | |
michael@0 | 1390 | function parseFor(node, init) { |
michael@0 | 1391 | node.init = init; |
michael@0 | 1392 | expect(_semi); |
michael@0 | 1393 | node.test = tokType === _semi ? null : parseExpression(); |
michael@0 | 1394 | expect(_semi); |
michael@0 | 1395 | node.update = tokType === _parenR ? null : parseExpression(); |
michael@0 | 1396 | expect(_parenR); |
michael@0 | 1397 | node.body = parseStatement(); |
michael@0 | 1398 | labels.pop(); |
michael@0 | 1399 | return finishNode(node, "ForStatement"); |
michael@0 | 1400 | } |
michael@0 | 1401 | |
michael@0 | 1402 | // Parse a `for`/`in` loop. |
michael@0 | 1403 | |
michael@0 | 1404 | function parseForIn(node, init) { |
michael@0 | 1405 | node.left = init; |
michael@0 | 1406 | node.right = parseExpression(); |
michael@0 | 1407 | expect(_parenR); |
michael@0 | 1408 | node.body = parseStatement(); |
michael@0 | 1409 | labels.pop(); |
michael@0 | 1410 | return finishNode(node, "ForInStatement"); |
michael@0 | 1411 | } |
michael@0 | 1412 | |
michael@0 | 1413 | // Parse a list of variable declarations. |
michael@0 | 1414 | |
michael@0 | 1415 | function parseVar(node, noIn) { |
michael@0 | 1416 | node.declarations = []; |
michael@0 | 1417 | node.kind = "var"; |
michael@0 | 1418 | for (;;) { |
michael@0 | 1419 | var decl = startNode(); |
michael@0 | 1420 | decl.id = parseIdent(); |
michael@0 | 1421 | if (strict && isStrictBadIdWord(decl.id.name)) |
michael@0 | 1422 | raise(decl.id.start, "Binding " + decl.id.name + " in strict mode"); |
michael@0 | 1423 | decl.init = eat(_eq) ? parseExpression(true, noIn) : null; |
michael@0 | 1424 | node.declarations.push(finishNode(decl, "VariableDeclarator")); |
michael@0 | 1425 | if (!eat(_comma)) break; |
michael@0 | 1426 | } |
michael@0 | 1427 | return node; |
michael@0 | 1428 | } |
michael@0 | 1429 | |
michael@0 | 1430 | // ### Expression parsing |
michael@0 | 1431 | |
michael@0 | 1432 | // These nest, from the most general expression type at the top to |
michael@0 | 1433 | // 'atomic', nondivisible expression types at the bottom. Most of |
michael@0 | 1434 | // the functions will simply let the function(s) below them parse, |
michael@0 | 1435 | // and, *if* the syntactic construct they handle is present, wrap |
michael@0 | 1436 | // the AST node that the inner parser gave them in another node. |
michael@0 | 1437 | |
michael@0 | 1438 | // Parse a full expression. The arguments are used to forbid comma |
michael@0 | 1439 | // sequences (in argument lists, array literals, or object literals) |
michael@0 | 1440 | // or the `in` operator (in for loops initalization expressions). |
michael@0 | 1441 | |
michael@0 | 1442 | function parseExpression(noComma, noIn) { |
michael@0 | 1443 | var expr = parseMaybeAssign(noIn); |
michael@0 | 1444 | if (!noComma && tokType === _comma) { |
michael@0 | 1445 | var node = startNodeFrom(expr); |
michael@0 | 1446 | node.expressions = [expr]; |
michael@0 | 1447 | while (eat(_comma)) node.expressions.push(parseMaybeAssign(noIn)); |
michael@0 | 1448 | return finishNode(node, "SequenceExpression"); |
michael@0 | 1449 | } |
michael@0 | 1450 | return expr; |
michael@0 | 1451 | } |
michael@0 | 1452 | |
michael@0 | 1453 | // Parse an assignment expression. This includes applications of |
michael@0 | 1454 | // operators like `+=`. |
michael@0 | 1455 | |
michael@0 | 1456 | function parseMaybeAssign(noIn) { |
michael@0 | 1457 | var left = parseMaybeConditional(noIn); |
michael@0 | 1458 | if (tokType.isAssign) { |
michael@0 | 1459 | var node = startNodeFrom(left); |
michael@0 | 1460 | node.operator = tokVal; |
michael@0 | 1461 | node.left = left; |
michael@0 | 1462 | next(); |
michael@0 | 1463 | node.right = parseMaybeAssign(noIn); |
michael@0 | 1464 | checkLVal(left); |
michael@0 | 1465 | return finishNode(node, "AssignmentExpression"); |
michael@0 | 1466 | } |
michael@0 | 1467 | return left; |
michael@0 | 1468 | } |
michael@0 | 1469 | |
michael@0 | 1470 | // Parse a ternary conditional (`?:`) operator. |
michael@0 | 1471 | |
michael@0 | 1472 | function parseMaybeConditional(noIn) { |
michael@0 | 1473 | var expr = parseExprOps(noIn); |
michael@0 | 1474 | if (eat(_question)) { |
michael@0 | 1475 | var node = startNodeFrom(expr); |
michael@0 | 1476 | node.test = expr; |
michael@0 | 1477 | node.consequent = parseExpression(true); |
michael@0 | 1478 | expect(_colon); |
michael@0 | 1479 | node.alternate = parseExpression(true, noIn); |
michael@0 | 1480 | return finishNode(node, "ConditionalExpression"); |
michael@0 | 1481 | } |
michael@0 | 1482 | return expr; |
michael@0 | 1483 | } |
michael@0 | 1484 | |
michael@0 | 1485 | // Start the precedence parser. |
michael@0 | 1486 | |
michael@0 | 1487 | function parseExprOps(noIn) { |
michael@0 | 1488 | return parseExprOp(parseMaybeUnary(), -1, noIn); |
michael@0 | 1489 | } |
michael@0 | 1490 | |
michael@0 | 1491 | // Parse binary operators with the operator precedence parsing |
michael@0 | 1492 | // algorithm. `left` is the left-hand side of the operator. |
michael@0 | 1493 | // `minPrec` provides context that allows the function to stop and |
michael@0 | 1494 | // defer further parser to one of its callers when it encounters an |
michael@0 | 1495 | // operator that has a lower precedence than the set it is parsing. |
michael@0 | 1496 | |
michael@0 | 1497 | function parseExprOp(left, minPrec, noIn) { |
michael@0 | 1498 | var prec = tokType.binop; |
michael@0 | 1499 | if (prec != null && (!noIn || tokType !== _in)) { |
michael@0 | 1500 | if (prec > minPrec) { |
michael@0 | 1501 | var node = startNodeFrom(left); |
michael@0 | 1502 | node.left = left; |
michael@0 | 1503 | node.operator = tokVal; |
michael@0 | 1504 | var op = tokType; |
michael@0 | 1505 | next(); |
michael@0 | 1506 | node.right = parseExprOp(parseMaybeUnary(), prec, noIn); |
michael@0 | 1507 | var exprNode = finishNode(node, (op === _logicalOR || op === _logicalAND) ? "LogicalExpression" : "BinaryExpression"); |
michael@0 | 1508 | return parseExprOp(exprNode, minPrec, noIn); |
michael@0 | 1509 | } |
michael@0 | 1510 | } |
michael@0 | 1511 | return left; |
michael@0 | 1512 | } |
michael@0 | 1513 | |
michael@0 | 1514 | // Parse unary operators, both prefix and postfix. |
michael@0 | 1515 | |
michael@0 | 1516 | function parseMaybeUnary() { |
michael@0 | 1517 | if (tokType.prefix) { |
michael@0 | 1518 | var node = startNode(), update = tokType.isUpdate; |
michael@0 | 1519 | node.operator = tokVal; |
michael@0 | 1520 | node.prefix = true; |
michael@0 | 1521 | tokRegexpAllowed = true; |
michael@0 | 1522 | next(); |
michael@0 | 1523 | node.argument = parseMaybeUnary(); |
michael@0 | 1524 | if (update) checkLVal(node.argument); |
michael@0 | 1525 | else if (strict && node.operator === "delete" && |
michael@0 | 1526 | node.argument.type === "Identifier") |
michael@0 | 1527 | raise(node.start, "Deleting local variable in strict mode"); |
michael@0 | 1528 | return finishNode(node, update ? "UpdateExpression" : "UnaryExpression"); |
michael@0 | 1529 | } |
michael@0 | 1530 | var expr = parseExprSubscripts(); |
michael@0 | 1531 | while (tokType.postfix && !canInsertSemicolon()) { |
michael@0 | 1532 | var node = startNodeFrom(expr); |
michael@0 | 1533 | node.operator = tokVal; |
michael@0 | 1534 | node.prefix = false; |
michael@0 | 1535 | node.argument = expr; |
michael@0 | 1536 | checkLVal(expr); |
michael@0 | 1537 | next(); |
michael@0 | 1538 | expr = finishNode(node, "UpdateExpression"); |
michael@0 | 1539 | } |
michael@0 | 1540 | return expr; |
michael@0 | 1541 | } |
michael@0 | 1542 | |
michael@0 | 1543 | // Parse call, dot, and `[]`-subscript expressions. |
michael@0 | 1544 | |
michael@0 | 1545 | function parseExprSubscripts() { |
michael@0 | 1546 | return parseSubscripts(parseExprAtom()); |
michael@0 | 1547 | } |
michael@0 | 1548 | |
michael@0 | 1549 | function parseSubscripts(base, noCalls) { |
michael@0 | 1550 | if (eat(_dot)) { |
michael@0 | 1551 | var node = startNodeFrom(base); |
michael@0 | 1552 | node.object = base; |
michael@0 | 1553 | node.property = parseIdent(true); |
michael@0 | 1554 | node.computed = false; |
michael@0 | 1555 | return parseSubscripts(finishNode(node, "MemberExpression"), noCalls); |
michael@0 | 1556 | } else if (eat(_bracketL)) { |
michael@0 | 1557 | var node = startNodeFrom(base); |
michael@0 | 1558 | node.object = base; |
michael@0 | 1559 | node.property = parseExpression(); |
michael@0 | 1560 | node.computed = true; |
michael@0 | 1561 | expect(_bracketR); |
michael@0 | 1562 | return parseSubscripts(finishNode(node, "MemberExpression"), noCalls); |
michael@0 | 1563 | } else if (!noCalls && eat(_parenL)) { |
michael@0 | 1564 | var node = startNodeFrom(base); |
michael@0 | 1565 | node.callee = base; |
michael@0 | 1566 | node.arguments = parseExprList(_parenR, false); |
michael@0 | 1567 | return parseSubscripts(finishNode(node, "CallExpression"), noCalls); |
michael@0 | 1568 | } else return base; |
michael@0 | 1569 | } |
michael@0 | 1570 | |
michael@0 | 1571 | // Parse an atomic expression — either a single token that is an |
michael@0 | 1572 | // expression, an expression started by a keyword like `function` or |
michael@0 | 1573 | // `new`, or an expression wrapped in punctuation like `()`, `[]`, |
michael@0 | 1574 | // or `{}`. |
michael@0 | 1575 | |
michael@0 | 1576 | function parseExprAtom() { |
michael@0 | 1577 | switch (tokType) { |
michael@0 | 1578 | case _this: |
michael@0 | 1579 | var node = startNode(); |
michael@0 | 1580 | next(); |
michael@0 | 1581 | return finishNode(node, "ThisExpression"); |
michael@0 | 1582 | case _name: |
michael@0 | 1583 | return parseIdent(); |
michael@0 | 1584 | case _num: case _string: case _regexp: |
michael@0 | 1585 | var node = startNode(); |
michael@0 | 1586 | node.value = tokVal; |
michael@0 | 1587 | node.raw = input.slice(tokStart, tokEnd); |
michael@0 | 1588 | next(); |
michael@0 | 1589 | return finishNode(node, "Literal"); |
michael@0 | 1590 | |
michael@0 | 1591 | case _null: case _true: case _false: |
michael@0 | 1592 | var node = startNode(); |
michael@0 | 1593 | node.value = tokType.atomValue; |
michael@0 | 1594 | node.raw = tokType.keyword; |
michael@0 | 1595 | next(); |
michael@0 | 1596 | return finishNode(node, "Literal"); |
michael@0 | 1597 | |
michael@0 | 1598 | case _parenL: |
michael@0 | 1599 | var tokStartLoc1 = tokStartLoc, tokStart1 = tokStart; |
michael@0 | 1600 | next(); |
michael@0 | 1601 | var val = parseExpression(); |
michael@0 | 1602 | val.start = tokStart1; |
michael@0 | 1603 | val.end = tokEnd; |
michael@0 | 1604 | if (options.locations) { |
michael@0 | 1605 | val.loc.start = tokStartLoc1; |
michael@0 | 1606 | val.loc.end = tokEndLoc; |
michael@0 | 1607 | } |
michael@0 | 1608 | if (options.ranges) |
michael@0 | 1609 | val.range = [tokStart1, tokEnd]; |
michael@0 | 1610 | expect(_parenR); |
michael@0 | 1611 | return val; |
michael@0 | 1612 | |
michael@0 | 1613 | case _bracketL: |
michael@0 | 1614 | var node = startNode(); |
michael@0 | 1615 | next(); |
michael@0 | 1616 | node.elements = parseExprList(_bracketR, true, true); |
michael@0 | 1617 | return finishNode(node, "ArrayExpression"); |
michael@0 | 1618 | |
michael@0 | 1619 | case _braceL: |
michael@0 | 1620 | return parseObj(); |
michael@0 | 1621 | |
michael@0 | 1622 | case _function: |
michael@0 | 1623 | var node = startNode(); |
michael@0 | 1624 | next(); |
michael@0 | 1625 | return parseFunction(node, false); |
michael@0 | 1626 | |
michael@0 | 1627 | case _new: |
michael@0 | 1628 | return parseNew(); |
michael@0 | 1629 | |
michael@0 | 1630 | default: |
michael@0 | 1631 | unexpected(); |
michael@0 | 1632 | } |
michael@0 | 1633 | } |
michael@0 | 1634 | |
michael@0 | 1635 | // New's precedence is slightly tricky. It must allow its argument |
michael@0 | 1636 | // to be a `[]` or dot subscript expression, but not a call — at |
michael@0 | 1637 | // least, not without wrapping it in parentheses. Thus, it uses the |
michael@0 | 1638 | |
michael@0 | 1639 | function parseNew() { |
michael@0 | 1640 | var node = startNode(); |
michael@0 | 1641 | next(); |
michael@0 | 1642 | node.callee = parseSubscripts(parseExprAtom(), true); |
michael@0 | 1643 | if (eat(_parenL)) node.arguments = parseExprList(_parenR, false); |
michael@0 | 1644 | else node.arguments = empty; |
michael@0 | 1645 | return finishNode(node, "NewExpression"); |
michael@0 | 1646 | } |
michael@0 | 1647 | |
michael@0 | 1648 | // Parse an object literal. |
michael@0 | 1649 | |
michael@0 | 1650 | function parseObj() { |
michael@0 | 1651 | var node = startNode(), first = true, sawGetSet = false; |
michael@0 | 1652 | node.properties = []; |
michael@0 | 1653 | next(); |
michael@0 | 1654 | while (!eat(_braceR)) { |
michael@0 | 1655 | if (!first) { |
michael@0 | 1656 | expect(_comma); |
michael@0 | 1657 | if (options.allowTrailingCommas && eat(_braceR)) break; |
michael@0 | 1658 | } else first = false; |
michael@0 | 1659 | |
michael@0 | 1660 | var prop = {key: parsePropertyName()}, isGetSet = false, kind; |
michael@0 | 1661 | if (eat(_colon)) { |
michael@0 | 1662 | prop.value = parseExpression(true); |
michael@0 | 1663 | kind = prop.kind = "init"; |
michael@0 | 1664 | } else if (options.ecmaVersion >= 5 && prop.key.type === "Identifier" && |
michael@0 | 1665 | (prop.key.name === "get" || prop.key.name === "set")) { |
michael@0 | 1666 | isGetSet = sawGetSet = true; |
michael@0 | 1667 | kind = prop.kind = prop.key.name; |
michael@0 | 1668 | prop.key = parsePropertyName(); |
michael@0 | 1669 | if (tokType !== _parenL) unexpected(); |
michael@0 | 1670 | prop.value = parseFunction(startNode(), false); |
michael@0 | 1671 | } else unexpected(); |
michael@0 | 1672 | |
michael@0 | 1673 | // getters and setters are not allowed to clash — either with |
michael@0 | 1674 | // each other or with an init property — and in strict mode, |
michael@0 | 1675 | // init properties are also not allowed to be repeated. |
michael@0 | 1676 | |
michael@0 | 1677 | if (prop.key.type === "Identifier" && (strict || sawGetSet)) { |
michael@0 | 1678 | for (var i = 0; i < node.properties.length; ++i) { |
michael@0 | 1679 | var other = node.properties[i]; |
michael@0 | 1680 | if (other.key.name === prop.key.name) { |
michael@0 | 1681 | var conflict = kind == other.kind || isGetSet && other.kind === "init" || |
michael@0 | 1682 | kind === "init" && (other.kind === "get" || other.kind === "set"); |
michael@0 | 1683 | if (conflict && !strict && kind === "init" && other.kind === "init") conflict = false; |
michael@0 | 1684 | if (conflict) raise(prop.key.start, "Redefinition of property"); |
michael@0 | 1685 | } |
michael@0 | 1686 | } |
michael@0 | 1687 | } |
michael@0 | 1688 | node.properties.push(prop); |
michael@0 | 1689 | } |
michael@0 | 1690 | return finishNode(node, "ObjectExpression"); |
michael@0 | 1691 | } |
michael@0 | 1692 | |
michael@0 | 1693 | function parsePropertyName() { |
michael@0 | 1694 | if (tokType === _num || tokType === _string) return parseExprAtom(); |
michael@0 | 1695 | return parseIdent(true); |
michael@0 | 1696 | } |
michael@0 | 1697 | |
michael@0 | 1698 | // Parse a function declaration or literal (depending on the |
michael@0 | 1699 | // `isStatement` parameter). |
michael@0 | 1700 | |
michael@0 | 1701 | function parseFunction(node, isStatement) { |
michael@0 | 1702 | if (tokType === _name) node.id = parseIdent(); |
michael@0 | 1703 | else if (isStatement) unexpected(); |
michael@0 | 1704 | else node.id = null; |
michael@0 | 1705 | node.params = []; |
michael@0 | 1706 | var first = true; |
michael@0 | 1707 | expect(_parenL); |
michael@0 | 1708 | while (!eat(_parenR)) { |
michael@0 | 1709 | if (!first) expect(_comma); else first = false; |
michael@0 | 1710 | node.params.push(parseIdent()); |
michael@0 | 1711 | } |
michael@0 | 1712 | |
michael@0 | 1713 | // Start a new scope with regard to labels and the `inFunction` |
michael@0 | 1714 | // flag (restore them to their old value afterwards). |
michael@0 | 1715 | var oldInFunc = inFunction, oldLabels = labels; |
michael@0 | 1716 | inFunction = true; labels = []; |
michael@0 | 1717 | node.body = parseBlock(true); |
michael@0 | 1718 | inFunction = oldInFunc; labels = oldLabels; |
michael@0 | 1719 | |
michael@0 | 1720 | // If this is a strict mode function, verify that argument names |
michael@0 | 1721 | // are not repeated, and it does not try to bind the words `eval` |
michael@0 | 1722 | // or `arguments`. |
michael@0 | 1723 | if (strict || node.body.body.length && isUseStrict(node.body.body[0])) { |
michael@0 | 1724 | for (var i = node.id ? -1 : 0; i < node.params.length; ++i) { |
michael@0 | 1725 | var id = i < 0 ? node.id : node.params[i]; |
michael@0 | 1726 | if (isStrictReservedWord(id.name) || isStrictBadIdWord(id.name)) |
michael@0 | 1727 | raise(id.start, "Defining '" + id.name + "' in strict mode"); |
michael@0 | 1728 | if (i >= 0) for (var j = 0; j < i; ++j) if (id.name === node.params[j].name) |
michael@0 | 1729 | raise(id.start, "Argument name clash in strict mode"); |
michael@0 | 1730 | } |
michael@0 | 1731 | } |
michael@0 | 1732 | |
michael@0 | 1733 | return finishNode(node, isStatement ? "FunctionDeclaration" : "FunctionExpression"); |
michael@0 | 1734 | } |
michael@0 | 1735 | |
michael@0 | 1736 | // Parses a comma-separated list of expressions, and returns them as |
michael@0 | 1737 | // an array. `close` is the token type that ends the list, and |
michael@0 | 1738 | // `allowEmpty` can be turned on to allow subsequent commas with |
michael@0 | 1739 | // nothing in between them to be parsed as `null` (which is needed |
michael@0 | 1740 | // for array literals). |
michael@0 | 1741 | |
michael@0 | 1742 | function parseExprList(close, allowTrailingComma, allowEmpty) { |
michael@0 | 1743 | var elts = [], first = true; |
michael@0 | 1744 | while (!eat(close)) { |
michael@0 | 1745 | if (!first) { |
michael@0 | 1746 | expect(_comma); |
michael@0 | 1747 | if (allowTrailingComma && options.allowTrailingCommas && eat(close)) break; |
michael@0 | 1748 | } else first = false; |
michael@0 | 1749 | |
michael@0 | 1750 | if (allowEmpty && tokType === _comma) elts.push(null); |
michael@0 | 1751 | else elts.push(parseExpression(true)); |
michael@0 | 1752 | } |
michael@0 | 1753 | return elts; |
michael@0 | 1754 | } |
michael@0 | 1755 | |
michael@0 | 1756 | // Parse the next token as an identifier. If `liberal` is true (used |
michael@0 | 1757 | // when parsing properties), it will also convert keywords into |
michael@0 | 1758 | // identifiers. |
michael@0 | 1759 | |
michael@0 | 1760 | function parseIdent(liberal) { |
michael@0 | 1761 | var node = startNode(); |
michael@0 | 1762 | node.name = tokType === _name ? tokVal : (liberal && !options.forbidReserved && tokType.keyword) || unexpected(); |
michael@0 | 1763 | tokRegexpAllowed = false; |
michael@0 | 1764 | next(); |
michael@0 | 1765 | return finishNode(node, "Identifier"); |
michael@0 | 1766 | } |
michael@0 | 1767 | |
michael@0 | 1768 | }); |