|
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 |
|
7 #ifndef MITREXSL_EXPRLEXER_H |
|
8 #define MITREXSL_EXPRLEXER_H |
|
9 |
|
10 #include "txCore.h" |
|
11 #include "nsString.h" |
|
12 |
|
13 /** |
|
14 * A Token class for the ExprLexer. |
|
15 * |
|
16 * This class was ported from XSL:P, an open source Java based |
|
17 * XSLT processor, written by yours truly. |
|
18 */ |
|
19 class Token |
|
20 { |
|
21 public: |
|
22 |
|
23 /** |
|
24 * Token types |
|
25 */ |
|
26 enum Type { |
|
27 //-- Trivial Tokens |
|
28 NULL_TOKEN = 1, |
|
29 LITERAL, |
|
30 NUMBER, |
|
31 CNAME, |
|
32 VAR_REFERENCE, |
|
33 PARENT_NODE, |
|
34 SELF_NODE, |
|
35 R_PAREN, |
|
36 R_BRACKET, // 9 |
|
37 /** |
|
38 * start of tokens for 3.7, bullet 1 |
|
39 * ExprLexer::nextIsOperatorToken bails if the tokens aren't |
|
40 * consecutive. |
|
41 */ |
|
42 COMMA, |
|
43 AT_SIGN, |
|
44 L_PAREN, |
|
45 L_BRACKET, |
|
46 AXIS_IDENTIFIER, |
|
47 |
|
48 // These tokens include their following left parenthesis |
|
49 FUNCTION_NAME_AND_PAREN, // 15 |
|
50 COMMENT_AND_PAREN, |
|
51 NODE_AND_PAREN, |
|
52 PROC_INST_AND_PAREN, |
|
53 TEXT_AND_PAREN, |
|
54 |
|
55 /** |
|
56 * operators |
|
57 */ |
|
58 //-- boolean ops |
|
59 AND_OP, // 20 |
|
60 OR_OP, |
|
61 |
|
62 //-- relational |
|
63 EQUAL_OP, // 22 |
|
64 NOT_EQUAL_OP, |
|
65 LESS_THAN_OP, |
|
66 GREATER_THAN_OP, |
|
67 LESS_OR_EQUAL_OP, |
|
68 GREATER_OR_EQUAL_OP, |
|
69 //-- additive operators |
|
70 ADDITION_OP, // 28 |
|
71 SUBTRACTION_OP, |
|
72 //-- multiplicative |
|
73 DIVIDE_OP, // 30 |
|
74 MULTIPLY_OP, |
|
75 MODULUS_OP, |
|
76 //-- path operators |
|
77 PARENT_OP, // 33 |
|
78 ANCESTOR_OP, |
|
79 UNION_OP, |
|
80 /** |
|
81 * end of tokens for 3.7, bullet 1 -/ |
|
82 */ |
|
83 //-- Special endtoken |
|
84 END // 36 |
|
85 }; |
|
86 |
|
87 |
|
88 /** |
|
89 * Constructors |
|
90 */ |
|
91 typedef nsASingleFragmentString::const_char_iterator iterator; |
|
92 |
|
93 Token(iterator aStart, iterator aEnd, Type aType) |
|
94 : mStart(aStart), |
|
95 mEnd(aEnd), |
|
96 mType(aType), |
|
97 mNext(nullptr) |
|
98 { |
|
99 } |
|
100 Token(iterator aChar, Type aType) |
|
101 : mStart(aChar), |
|
102 mEnd(aChar + 1), |
|
103 mType(aType), |
|
104 mNext(nullptr) |
|
105 { |
|
106 } |
|
107 |
|
108 const nsDependentSubstring Value() |
|
109 { |
|
110 return Substring(mStart, mEnd); |
|
111 } |
|
112 |
|
113 iterator mStart, mEnd; |
|
114 Type mType; |
|
115 Token* mNext; |
|
116 }; |
|
117 |
|
118 /** |
|
119 * A class for splitting an "Expr" String into tokens and |
|
120 * performing basic Lexical Analysis. |
|
121 * |
|
122 * This class was ported from XSL:P, an open source Java based XSL processor |
|
123 */ |
|
124 |
|
125 class txExprLexer |
|
126 { |
|
127 public: |
|
128 |
|
129 txExprLexer(); |
|
130 ~txExprLexer(); |
|
131 |
|
132 /** |
|
133 * Parse the given string. |
|
134 * returns an error result if lexing failed. |
|
135 * The given string must outlive the use of the lexer, as the |
|
136 * generated Tokens point to Substrings of it. |
|
137 * mPosition points to the offending location in case of an error. |
|
138 */ |
|
139 nsresult parse(const nsASingleFragmentString& aPattern); |
|
140 |
|
141 typedef nsASingleFragmentString::const_char_iterator iterator; |
|
142 iterator mPosition; |
|
143 |
|
144 /** |
|
145 * Functions for iterating over the TokenList |
|
146 */ |
|
147 |
|
148 Token* nextToken(); |
|
149 Token* peek() |
|
150 { |
|
151 NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer"); |
|
152 return mCurrentItem; |
|
153 } |
|
154 Token* peekAhead() |
|
155 { |
|
156 NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer"); |
|
157 // Don't peek past the end node |
|
158 return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext : mCurrentItem; |
|
159 } |
|
160 bool hasMoreTokens() |
|
161 { |
|
162 NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer"); |
|
163 return (mCurrentItem && mCurrentItem->mType != Token::END); |
|
164 } |
|
165 |
|
166 /** |
|
167 * Trivial Tokens |
|
168 */ |
|
169 //-- LF, changed to enum |
|
170 enum _TrivialTokens { |
|
171 D_QUOTE = '\"', |
|
172 S_QUOTE = '\'', |
|
173 L_PAREN = '(', |
|
174 R_PAREN = ')', |
|
175 L_BRACKET = '[', |
|
176 R_BRACKET = ']', |
|
177 L_ANGLE = '<', |
|
178 R_ANGLE = '>', |
|
179 COMMA = ',', |
|
180 PERIOD = '.', |
|
181 ASTERIX = '*', |
|
182 FORWARD_SLASH = '/', |
|
183 EQUAL = '=', |
|
184 BANG = '!', |
|
185 VERT_BAR = '|', |
|
186 AT_SIGN = '@', |
|
187 DOLLAR_SIGN = '$', |
|
188 PLUS = '+', |
|
189 HYPHEN = '-', |
|
190 COLON = ':', |
|
191 //-- whitespace tokens |
|
192 SPACE = ' ', |
|
193 TX_TAB = '\t', |
|
194 TX_CR = '\n', |
|
195 TX_LF = '\r' |
|
196 }; |
|
197 |
|
198 private: |
|
199 |
|
200 Token* mCurrentItem; |
|
201 Token* mFirstItem; |
|
202 Token* mLastItem; |
|
203 |
|
204 int mTokenCount; |
|
205 |
|
206 void addToken(Token* aToken); |
|
207 |
|
208 /** |
|
209 * Returns true if the following Token should be an operator. |
|
210 * This is a helper for the first bullet of [XPath 3.7] |
|
211 * Lexical Structure |
|
212 */ |
|
213 bool nextIsOperatorToken(Token* aToken); |
|
214 |
|
215 /** |
|
216 * Returns true if the given character represents a numeric letter (digit) |
|
217 * Implemented in ExprLexerChars.cpp |
|
218 */ |
|
219 static bool isXPathDigit(char16_t ch) |
|
220 { |
|
221 return (ch >= '0' && ch <= '9'); |
|
222 } |
|
223 }; |
|
224 |
|
225 #endif |
|
226 |