Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
michael@0 | 2 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 3 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 4 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 5 | |
michael@0 | 6 | |
michael@0 | 7 | #ifndef MITREXSL_EXPRLEXER_H |
michael@0 | 8 | #define MITREXSL_EXPRLEXER_H |
michael@0 | 9 | |
michael@0 | 10 | #include "txCore.h" |
michael@0 | 11 | #include "nsString.h" |
michael@0 | 12 | |
michael@0 | 13 | /** |
michael@0 | 14 | * A Token class for the ExprLexer. |
michael@0 | 15 | * |
michael@0 | 16 | * This class was ported from XSL:P, an open source Java based |
michael@0 | 17 | * XSLT processor, written by yours truly. |
michael@0 | 18 | */ |
michael@0 | 19 | class Token |
michael@0 | 20 | { |
michael@0 | 21 | public: |
michael@0 | 22 | |
michael@0 | 23 | /** |
michael@0 | 24 | * Token types |
michael@0 | 25 | */ |
michael@0 | 26 | enum Type { |
michael@0 | 27 | //-- Trivial Tokens |
michael@0 | 28 | NULL_TOKEN = 1, |
michael@0 | 29 | LITERAL, |
michael@0 | 30 | NUMBER, |
michael@0 | 31 | CNAME, |
michael@0 | 32 | VAR_REFERENCE, |
michael@0 | 33 | PARENT_NODE, |
michael@0 | 34 | SELF_NODE, |
michael@0 | 35 | R_PAREN, |
michael@0 | 36 | R_BRACKET, // 9 |
michael@0 | 37 | /** |
michael@0 | 38 | * start of tokens for 3.7, bullet 1 |
michael@0 | 39 | * ExprLexer::nextIsOperatorToken bails if the tokens aren't |
michael@0 | 40 | * consecutive. |
michael@0 | 41 | */ |
michael@0 | 42 | COMMA, |
michael@0 | 43 | AT_SIGN, |
michael@0 | 44 | L_PAREN, |
michael@0 | 45 | L_BRACKET, |
michael@0 | 46 | AXIS_IDENTIFIER, |
michael@0 | 47 | |
michael@0 | 48 | // These tokens include their following left parenthesis |
michael@0 | 49 | FUNCTION_NAME_AND_PAREN, // 15 |
michael@0 | 50 | COMMENT_AND_PAREN, |
michael@0 | 51 | NODE_AND_PAREN, |
michael@0 | 52 | PROC_INST_AND_PAREN, |
michael@0 | 53 | TEXT_AND_PAREN, |
michael@0 | 54 | |
michael@0 | 55 | /** |
michael@0 | 56 | * operators |
michael@0 | 57 | */ |
michael@0 | 58 | //-- boolean ops |
michael@0 | 59 | AND_OP, // 20 |
michael@0 | 60 | OR_OP, |
michael@0 | 61 | |
michael@0 | 62 | //-- relational |
michael@0 | 63 | EQUAL_OP, // 22 |
michael@0 | 64 | NOT_EQUAL_OP, |
michael@0 | 65 | LESS_THAN_OP, |
michael@0 | 66 | GREATER_THAN_OP, |
michael@0 | 67 | LESS_OR_EQUAL_OP, |
michael@0 | 68 | GREATER_OR_EQUAL_OP, |
michael@0 | 69 | //-- additive operators |
michael@0 | 70 | ADDITION_OP, // 28 |
michael@0 | 71 | SUBTRACTION_OP, |
michael@0 | 72 | //-- multiplicative |
michael@0 | 73 | DIVIDE_OP, // 30 |
michael@0 | 74 | MULTIPLY_OP, |
michael@0 | 75 | MODULUS_OP, |
michael@0 | 76 | //-- path operators |
michael@0 | 77 | PARENT_OP, // 33 |
michael@0 | 78 | ANCESTOR_OP, |
michael@0 | 79 | UNION_OP, |
michael@0 | 80 | /** |
michael@0 | 81 | * end of tokens for 3.7, bullet 1 -/ |
michael@0 | 82 | */ |
michael@0 | 83 | //-- Special endtoken |
michael@0 | 84 | END // 36 |
michael@0 | 85 | }; |
michael@0 | 86 | |
michael@0 | 87 | |
michael@0 | 88 | /** |
michael@0 | 89 | * Constructors |
michael@0 | 90 | */ |
michael@0 | 91 | typedef nsASingleFragmentString::const_char_iterator iterator; |
michael@0 | 92 | |
michael@0 | 93 | Token(iterator aStart, iterator aEnd, Type aType) |
michael@0 | 94 | : mStart(aStart), |
michael@0 | 95 | mEnd(aEnd), |
michael@0 | 96 | mType(aType), |
michael@0 | 97 | mNext(nullptr) |
michael@0 | 98 | { |
michael@0 | 99 | } |
michael@0 | 100 | Token(iterator aChar, Type aType) |
michael@0 | 101 | : mStart(aChar), |
michael@0 | 102 | mEnd(aChar + 1), |
michael@0 | 103 | mType(aType), |
michael@0 | 104 | mNext(nullptr) |
michael@0 | 105 | { |
michael@0 | 106 | } |
michael@0 | 107 | |
michael@0 | 108 | const nsDependentSubstring Value() |
michael@0 | 109 | { |
michael@0 | 110 | return Substring(mStart, mEnd); |
michael@0 | 111 | } |
michael@0 | 112 | |
michael@0 | 113 | iterator mStart, mEnd; |
michael@0 | 114 | Type mType; |
michael@0 | 115 | Token* mNext; |
michael@0 | 116 | }; |
michael@0 | 117 | |
michael@0 | 118 | /** |
michael@0 | 119 | * A class for splitting an "Expr" String into tokens and |
michael@0 | 120 | * performing basic Lexical Analysis. |
michael@0 | 121 | * |
michael@0 | 122 | * This class was ported from XSL:P, an open source Java based XSL processor |
michael@0 | 123 | */ |
michael@0 | 124 | |
michael@0 | 125 | class txExprLexer |
michael@0 | 126 | { |
michael@0 | 127 | public: |
michael@0 | 128 | |
michael@0 | 129 | txExprLexer(); |
michael@0 | 130 | ~txExprLexer(); |
michael@0 | 131 | |
michael@0 | 132 | /** |
michael@0 | 133 | * Parse the given string. |
michael@0 | 134 | * returns an error result if lexing failed. |
michael@0 | 135 | * The given string must outlive the use of the lexer, as the |
michael@0 | 136 | * generated Tokens point to Substrings of it. |
michael@0 | 137 | * mPosition points to the offending location in case of an error. |
michael@0 | 138 | */ |
michael@0 | 139 | nsresult parse(const nsASingleFragmentString& aPattern); |
michael@0 | 140 | |
michael@0 | 141 | typedef nsASingleFragmentString::const_char_iterator iterator; |
michael@0 | 142 | iterator mPosition; |
michael@0 | 143 | |
michael@0 | 144 | /** |
michael@0 | 145 | * Functions for iterating over the TokenList |
michael@0 | 146 | */ |
michael@0 | 147 | |
michael@0 | 148 | Token* nextToken(); |
michael@0 | 149 | Token* peek() |
michael@0 | 150 | { |
michael@0 | 151 | NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer"); |
michael@0 | 152 | return mCurrentItem; |
michael@0 | 153 | } |
michael@0 | 154 | Token* peekAhead() |
michael@0 | 155 | { |
michael@0 | 156 | NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer"); |
michael@0 | 157 | // Don't peek past the end node |
michael@0 | 158 | return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext : mCurrentItem; |
michael@0 | 159 | } |
michael@0 | 160 | bool hasMoreTokens() |
michael@0 | 161 | { |
michael@0 | 162 | NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer"); |
michael@0 | 163 | return (mCurrentItem && mCurrentItem->mType != Token::END); |
michael@0 | 164 | } |
michael@0 | 165 | |
michael@0 | 166 | /** |
michael@0 | 167 | * Trivial Tokens |
michael@0 | 168 | */ |
michael@0 | 169 | //-- LF, changed to enum |
michael@0 | 170 | enum _TrivialTokens { |
michael@0 | 171 | D_QUOTE = '\"', |
michael@0 | 172 | S_QUOTE = '\'', |
michael@0 | 173 | L_PAREN = '(', |
michael@0 | 174 | R_PAREN = ')', |
michael@0 | 175 | L_BRACKET = '[', |
michael@0 | 176 | R_BRACKET = ']', |
michael@0 | 177 | L_ANGLE = '<', |
michael@0 | 178 | R_ANGLE = '>', |
michael@0 | 179 | COMMA = ',', |
michael@0 | 180 | PERIOD = '.', |
michael@0 | 181 | ASTERIX = '*', |
michael@0 | 182 | FORWARD_SLASH = '/', |
michael@0 | 183 | EQUAL = '=', |
michael@0 | 184 | BANG = '!', |
michael@0 | 185 | VERT_BAR = '|', |
michael@0 | 186 | AT_SIGN = '@', |
michael@0 | 187 | DOLLAR_SIGN = '$', |
michael@0 | 188 | PLUS = '+', |
michael@0 | 189 | HYPHEN = '-', |
michael@0 | 190 | COLON = ':', |
michael@0 | 191 | //-- whitespace tokens |
michael@0 | 192 | SPACE = ' ', |
michael@0 | 193 | TX_TAB = '\t', |
michael@0 | 194 | TX_CR = '\n', |
michael@0 | 195 | TX_LF = '\r' |
michael@0 | 196 | }; |
michael@0 | 197 | |
michael@0 | 198 | private: |
michael@0 | 199 | |
michael@0 | 200 | Token* mCurrentItem; |
michael@0 | 201 | Token* mFirstItem; |
michael@0 | 202 | Token* mLastItem; |
michael@0 | 203 | |
michael@0 | 204 | int mTokenCount; |
michael@0 | 205 | |
michael@0 | 206 | void addToken(Token* aToken); |
michael@0 | 207 | |
michael@0 | 208 | /** |
michael@0 | 209 | * Returns true if the following Token should be an operator. |
michael@0 | 210 | * This is a helper for the first bullet of [XPath 3.7] |
michael@0 | 211 | * Lexical Structure |
michael@0 | 212 | */ |
michael@0 | 213 | bool nextIsOperatorToken(Token* aToken); |
michael@0 | 214 | |
michael@0 | 215 | /** |
michael@0 | 216 | * Returns true if the given character represents a numeric letter (digit) |
michael@0 | 217 | * Implemented in ExprLexerChars.cpp |
michael@0 | 218 | */ |
michael@0 | 219 | static bool isXPathDigit(char16_t ch) |
michael@0 | 220 | { |
michael@0 | 221 | return (ch >= '0' && ch <= '9'); |
michael@0 | 222 | } |
michael@0 | 223 | }; |
michael@0 | 224 | |
michael@0 | 225 | #endif |
michael@0 | 226 |