|
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /** |
|
7 * Lexical analyzer for XPath expressions |
|
8 */ |
|
9 |
|
10 #include "txExprLexer.h" |
|
11 #include "nsGkAtoms.h" |
|
12 #include "nsString.h" |
|
13 #include "nsError.h" |
|
14 #include "txXMLUtils.h" |
|
15 |
|
16 /** |
|
17 * Creates a new ExprLexer |
|
18 */ |
|
19 txExprLexer::txExprLexer() |
|
20 : mCurrentItem(nullptr), |
|
21 mFirstItem(nullptr), |
|
22 mLastItem(nullptr), |
|
23 mTokenCount(0) |
|
24 { |
|
25 } |
|
26 |
|
27 /** |
|
28 * Destroys this instance of an txExprLexer |
|
29 */ |
|
30 txExprLexer::~txExprLexer() |
|
31 { |
|
32 //-- delete tokens |
|
33 Token* tok = mFirstItem; |
|
34 while (tok) { |
|
35 Token* temp = tok->mNext; |
|
36 delete tok; |
|
37 tok = temp; |
|
38 } |
|
39 mCurrentItem = nullptr; |
|
40 } |
|
41 |
|
42 Token* |
|
43 txExprLexer::nextToken() |
|
44 { |
|
45 if (!mCurrentItem) { |
|
46 NS_NOTREACHED("nextToken called on uninitialized lexer"); |
|
47 return nullptr; |
|
48 } |
|
49 |
|
50 if (mCurrentItem->mType == Token::END) { |
|
51 // Do not progress beyond the end token |
|
52 return mCurrentItem; |
|
53 } |
|
54 |
|
55 Token* token = mCurrentItem; |
|
56 mCurrentItem = mCurrentItem->mNext; |
|
57 return token; |
|
58 } |
|
59 |
|
60 void |
|
61 txExprLexer::addToken(Token* aToken) |
|
62 { |
|
63 if (mLastItem) { |
|
64 mLastItem->mNext = aToken; |
|
65 } |
|
66 if (!mFirstItem) { |
|
67 mFirstItem = aToken; |
|
68 mCurrentItem = aToken; |
|
69 } |
|
70 mLastItem = aToken; |
|
71 ++mTokenCount; |
|
72 } |
|
73 |
|
74 /** |
|
75 * Returns true if the following Token should be an operator. |
|
76 * This is a helper for the first bullet of [XPath 3.7] |
|
77 * Lexical Structure |
|
78 */ |
|
79 bool |
|
80 txExprLexer::nextIsOperatorToken(Token* aToken) |
|
81 { |
|
82 if (!aToken || aToken->mType == Token::NULL_TOKEN) { |
|
83 return false; |
|
84 } |
|
85 /* This relies on the tokens having the right order in txExprLexer.h */ |
|
86 return aToken->mType < Token::COMMA || |
|
87 aToken->mType > Token::UNION_OP; |
|
88 |
|
89 } |
|
90 |
|
91 /** |
|
92 * Parses the given string into a sequence of Tokens |
|
93 */ |
|
94 nsresult |
|
95 txExprLexer::parse(const nsASingleFragmentString& aPattern) |
|
96 { |
|
97 iterator start, end; |
|
98 start = aPattern.BeginReading(mPosition); |
|
99 aPattern.EndReading(end); |
|
100 |
|
101 //-- initialize previous token, this will automatically get |
|
102 //-- deleted when it goes out of scope |
|
103 Token nullToken(nullptr, nullptr, Token::NULL_TOKEN); |
|
104 |
|
105 Token::Type defType; |
|
106 Token* newToken = nullptr; |
|
107 Token* prevToken = &nullToken; |
|
108 bool isToken; |
|
109 |
|
110 while (mPosition < end) { |
|
111 |
|
112 defType = Token::CNAME; |
|
113 isToken = true; |
|
114 |
|
115 if (*mPosition == DOLLAR_SIGN) { |
|
116 if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) { |
|
117 return NS_ERROR_XPATH_INVALID_VAR_NAME; |
|
118 } |
|
119 defType = Token::VAR_REFERENCE; |
|
120 } |
|
121 // just reuse the QName parsing, which will use defType |
|
122 // the token to construct |
|
123 |
|
124 if (XMLUtils::isLetter(*mPosition)) { |
|
125 // NCName, can get QName or OperatorName; |
|
126 // FunctionName, NodeName, and AxisSpecifier may want whitespace, |
|
127 // and are dealt with below |
|
128 start = mPosition; |
|
129 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { |
|
130 /* just go */ |
|
131 } |
|
132 if (mPosition < end && *mPosition == COLON) { |
|
133 // try QName or wildcard, might need to step back for axis |
|
134 if (++mPosition == end) { |
|
135 return NS_ERROR_XPATH_UNEXPECTED_END; |
|
136 } |
|
137 if (XMLUtils::isLetter(*mPosition)) { |
|
138 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { |
|
139 /* just go */ |
|
140 } |
|
141 } |
|
142 else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) { |
|
143 // eat wildcard for NameTest, bail for var ref at COLON |
|
144 ++mPosition; |
|
145 } |
|
146 else { |
|
147 --mPosition; // step back |
|
148 } |
|
149 } |
|
150 if (nextIsOperatorToken(prevToken)) { |
|
151 nsDependentSubstring op(Substring(start, mPosition)); |
|
152 if (nsGkAtoms::_and->Equals(op)) { |
|
153 defType = Token::AND_OP; |
|
154 } |
|
155 else if (nsGkAtoms::_or->Equals(op)) { |
|
156 defType = Token::OR_OP; |
|
157 } |
|
158 else if (nsGkAtoms::mod->Equals(op)) { |
|
159 defType = Token::MODULUS_OP; |
|
160 } |
|
161 else if (nsGkAtoms::div->Equals(op)) { |
|
162 defType = Token::DIVIDE_OP; |
|
163 } |
|
164 else { |
|
165 // XXX QUESTION: spec is not too precise |
|
166 // badops is sure an error, but is bad:ops, too? We say yes! |
|
167 return NS_ERROR_XPATH_OPERATOR_EXPECTED; |
|
168 } |
|
169 } |
|
170 newToken = new Token(start, mPosition, defType); |
|
171 } |
|
172 else if (isXPathDigit(*mPosition)) { |
|
173 start = mPosition; |
|
174 while (++mPosition < end && isXPathDigit(*mPosition)) { |
|
175 /* just go */ |
|
176 } |
|
177 if (mPosition < end && *mPosition == '.') { |
|
178 while (++mPosition < end && isXPathDigit(*mPosition)) { |
|
179 /* just go */ |
|
180 } |
|
181 } |
|
182 newToken = new Token(start, mPosition, Token::NUMBER); |
|
183 } |
|
184 else { |
|
185 switch (*mPosition) { |
|
186 //-- ignore whitespace |
|
187 case SPACE: |
|
188 case TX_TAB: |
|
189 case TX_CR: |
|
190 case TX_LF: |
|
191 ++mPosition; |
|
192 isToken = false; |
|
193 break; |
|
194 case S_QUOTE : |
|
195 case D_QUOTE : |
|
196 start = mPosition; |
|
197 while (++mPosition < end && *mPosition != *start) { |
|
198 // eat literal |
|
199 } |
|
200 if (mPosition == end) { |
|
201 mPosition = start; |
|
202 return NS_ERROR_XPATH_UNCLOSED_LITERAL; |
|
203 } |
|
204 newToken = new Token(start + 1, mPosition, Token::LITERAL); |
|
205 ++mPosition; |
|
206 break; |
|
207 case PERIOD: |
|
208 // period can be .., .(DIGITS)+ or ., check next |
|
209 if (++mPosition == end) { |
|
210 newToken = new Token(mPosition - 1, Token::SELF_NODE); |
|
211 } |
|
212 else if (isXPathDigit(*mPosition)) { |
|
213 start = mPosition - 1; |
|
214 while (++mPosition < end && isXPathDigit(*mPosition)) { |
|
215 /* just go */ |
|
216 } |
|
217 newToken = new Token(start, mPosition, Token::NUMBER); |
|
218 } |
|
219 else if (*mPosition == PERIOD) { |
|
220 ++mPosition; |
|
221 newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE); |
|
222 } |
|
223 else { |
|
224 newToken = new Token(mPosition - 1, Token::SELF_NODE); |
|
225 } |
|
226 break; |
|
227 case COLON: // QNames are dealt above, must be axis ident |
|
228 if (++mPosition >= end || *mPosition != COLON || |
|
229 prevToken->mType != Token::CNAME) { |
|
230 return NS_ERROR_XPATH_BAD_COLON; |
|
231 } |
|
232 prevToken->mType = Token::AXIS_IDENTIFIER; |
|
233 ++mPosition; |
|
234 isToken = false; |
|
235 break; |
|
236 case FORWARD_SLASH : |
|
237 if (++mPosition < end && *mPosition == FORWARD_SLASH) { |
|
238 ++mPosition; |
|
239 newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP); |
|
240 } |
|
241 else { |
|
242 newToken = new Token(mPosition - 1, Token::PARENT_OP); |
|
243 } |
|
244 break; |
|
245 case BANG : // can only be != |
|
246 if (++mPosition < end && *mPosition == EQUAL) { |
|
247 ++mPosition; |
|
248 newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP); |
|
249 break; |
|
250 } |
|
251 // Error ! is not not() |
|
252 return NS_ERROR_XPATH_BAD_BANG; |
|
253 case EQUAL: |
|
254 newToken = new Token(mPosition, Token::EQUAL_OP); |
|
255 ++mPosition; |
|
256 break; |
|
257 case L_ANGLE: |
|
258 if (++mPosition == end) { |
|
259 return NS_ERROR_XPATH_UNEXPECTED_END; |
|
260 } |
|
261 if (*mPosition == EQUAL) { |
|
262 ++mPosition; |
|
263 newToken = new Token(mPosition - 2, mPosition, |
|
264 Token::LESS_OR_EQUAL_OP); |
|
265 } |
|
266 else { |
|
267 newToken = new Token(mPosition - 1, Token::LESS_THAN_OP); |
|
268 } |
|
269 break; |
|
270 case R_ANGLE: |
|
271 if (++mPosition == end) { |
|
272 return NS_ERROR_XPATH_UNEXPECTED_END; |
|
273 } |
|
274 if (*mPosition == EQUAL) { |
|
275 ++mPosition; |
|
276 newToken = new Token(mPosition - 2, mPosition, |
|
277 Token::GREATER_OR_EQUAL_OP); |
|
278 } |
|
279 else { |
|
280 newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP); |
|
281 } |
|
282 break; |
|
283 case HYPHEN : |
|
284 newToken = new Token(mPosition, Token::SUBTRACTION_OP); |
|
285 ++mPosition; |
|
286 break; |
|
287 case ASTERIX: |
|
288 if (nextIsOperatorToken(prevToken)) { |
|
289 newToken = new Token(mPosition, Token::MULTIPLY_OP); |
|
290 } |
|
291 else { |
|
292 newToken = new Token(mPosition, Token::CNAME); |
|
293 } |
|
294 ++mPosition; |
|
295 break; |
|
296 case L_PAREN: |
|
297 if (prevToken->mType == Token::CNAME) { |
|
298 const nsDependentSubstring& val = prevToken->Value(); |
|
299 if (val.EqualsLiteral("comment")) { |
|
300 prevToken->mType = Token::COMMENT_AND_PAREN; |
|
301 } |
|
302 else if (val.EqualsLiteral("node")) { |
|
303 prevToken->mType = Token::NODE_AND_PAREN; |
|
304 } |
|
305 else if (val.EqualsLiteral("processing-instruction")) { |
|
306 prevToken->mType = Token::PROC_INST_AND_PAREN; |
|
307 } |
|
308 else if (val.EqualsLiteral("text")) { |
|
309 prevToken->mType = Token::TEXT_AND_PAREN; |
|
310 } |
|
311 else { |
|
312 prevToken->mType = Token::FUNCTION_NAME_AND_PAREN; |
|
313 } |
|
314 isToken = false; |
|
315 } |
|
316 else { |
|
317 newToken = new Token(mPosition, Token::L_PAREN); |
|
318 } |
|
319 ++mPosition; |
|
320 break; |
|
321 case R_PAREN: |
|
322 newToken = new Token(mPosition, Token::R_PAREN); |
|
323 ++mPosition; |
|
324 break; |
|
325 case L_BRACKET: |
|
326 newToken = new Token(mPosition, Token::L_BRACKET); |
|
327 ++mPosition; |
|
328 break; |
|
329 case R_BRACKET: |
|
330 newToken = new Token(mPosition, Token::R_BRACKET); |
|
331 ++mPosition; |
|
332 break; |
|
333 case COMMA: |
|
334 newToken = new Token(mPosition, Token::COMMA); |
|
335 ++mPosition; |
|
336 break; |
|
337 case AT_SIGN : |
|
338 newToken = new Token(mPosition, Token::AT_SIGN); |
|
339 ++mPosition; |
|
340 break; |
|
341 case PLUS: |
|
342 newToken = new Token(mPosition, Token::ADDITION_OP); |
|
343 ++mPosition; |
|
344 break; |
|
345 case VERT_BAR: |
|
346 newToken = new Token(mPosition, Token::UNION_OP); |
|
347 ++mPosition; |
|
348 break; |
|
349 default: |
|
350 // Error, don't grok character :-( |
|
351 return NS_ERROR_XPATH_ILLEGAL_CHAR; |
|
352 } |
|
353 } |
|
354 if (isToken) { |
|
355 NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY); |
|
356 NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE); |
|
357 prevToken = newToken; |
|
358 addToken(newToken); |
|
359 } |
|
360 } |
|
361 |
|
362 // add a endToken to the list |
|
363 newToken = new Token(end, end, Token::END); |
|
364 if (!newToken) { |
|
365 return NS_ERROR_OUT_OF_MEMORY; |
|
366 } |
|
367 addToken(newToken); |
|
368 |
|
369 return NS_OK; |
|
370 } |