1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/dom/xslt/xpath/txExprLexer.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,370 @@ 1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/** 1.10 + * Lexical analyzer for XPath expressions 1.11 + */ 1.12 + 1.13 +#include "txExprLexer.h" 1.14 +#include "nsGkAtoms.h" 1.15 +#include "nsString.h" 1.16 +#include "nsError.h" 1.17 +#include "txXMLUtils.h" 1.18 + 1.19 +/** 1.20 + * Creates a new ExprLexer 1.21 + */ 1.22 +txExprLexer::txExprLexer() 1.23 + : mCurrentItem(nullptr), 1.24 + mFirstItem(nullptr), 1.25 + mLastItem(nullptr), 1.26 + mTokenCount(0) 1.27 +{ 1.28 +} 1.29 + 1.30 +/** 1.31 + * Destroys this instance of an txExprLexer 1.32 + */ 1.33 +txExprLexer::~txExprLexer() 1.34 +{ 1.35 + //-- delete tokens 1.36 + Token* tok = mFirstItem; 1.37 + while (tok) { 1.38 + Token* temp = tok->mNext; 1.39 + delete tok; 1.40 + tok = temp; 1.41 + } 1.42 + mCurrentItem = nullptr; 1.43 +} 1.44 + 1.45 +Token* 1.46 +txExprLexer::nextToken() 1.47 +{ 1.48 + if (!mCurrentItem) { 1.49 + NS_NOTREACHED("nextToken called on uninitialized lexer"); 1.50 + return nullptr; 1.51 + } 1.52 + 1.53 + if (mCurrentItem->mType == Token::END) { 1.54 + // Do not progress beyond the end token 1.55 + return mCurrentItem; 1.56 + } 1.57 + 1.58 + Token* token = mCurrentItem; 1.59 + mCurrentItem = mCurrentItem->mNext; 1.60 + return token; 1.61 +} 1.62 + 1.63 +void 1.64 +txExprLexer::addToken(Token* aToken) 1.65 +{ 1.66 + if (mLastItem) { 1.67 + mLastItem->mNext = aToken; 1.68 + } 1.69 + if (!mFirstItem) { 1.70 + mFirstItem = aToken; 1.71 + mCurrentItem = aToken; 1.72 + } 1.73 + mLastItem = aToken; 1.74 + ++mTokenCount; 1.75 +} 1.76 + 1.77 +/** 1.78 + * Returns true if the following Token should be an operator. 1.79 + * This is a helper for the first bullet of [XPath 3.7] 1.80 + * Lexical Structure 1.81 + */ 1.82 +bool 1.83 +txExprLexer::nextIsOperatorToken(Token* aToken) 1.84 +{ 1.85 + if (!aToken || aToken->mType == Token::NULL_TOKEN) { 1.86 + return false; 1.87 + } 1.88 + /* This relies on the tokens having the right order in txExprLexer.h */ 1.89 + return aToken->mType < Token::COMMA || 1.90 + aToken->mType > Token::UNION_OP; 1.91 + 1.92 +} 1.93 + 1.94 +/** 1.95 + * Parses the given string into a sequence of Tokens 1.96 + */ 1.97 +nsresult 1.98 +txExprLexer::parse(const nsASingleFragmentString& aPattern) 1.99 +{ 1.100 + iterator start, end; 1.101 + start = aPattern.BeginReading(mPosition); 1.102 + aPattern.EndReading(end); 1.103 + 1.104 + //-- initialize previous token, this will automatically get 1.105 + //-- deleted when it goes out of scope 1.106 + Token nullToken(nullptr, nullptr, Token::NULL_TOKEN); 1.107 + 1.108 + Token::Type defType; 1.109 + Token* newToken = nullptr; 1.110 + Token* prevToken = &nullToken; 1.111 + bool isToken; 1.112 + 1.113 + while (mPosition < end) { 1.114 + 1.115 + defType = Token::CNAME; 1.116 + isToken = true; 1.117 + 1.118 + if (*mPosition == DOLLAR_SIGN) { 1.119 + if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) { 1.120 + return NS_ERROR_XPATH_INVALID_VAR_NAME; 1.121 + } 1.122 + defType = Token::VAR_REFERENCE; 1.123 + } 1.124 + // just reuse the QName parsing, which will use defType 1.125 + // the token to construct 1.126 + 1.127 + if (XMLUtils::isLetter(*mPosition)) { 1.128 + // NCName, can get QName or OperatorName; 1.129 + // FunctionName, NodeName, and AxisSpecifier may want whitespace, 1.130 + // and are dealt with below 1.131 + start = mPosition; 1.132 + while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { 1.133 + /* just go */ 1.134 + } 1.135 + if (mPosition < end && *mPosition == COLON) { 1.136 + // try QName or wildcard, might need to step back for axis 1.137 + if (++mPosition == end) { 1.138 + return NS_ERROR_XPATH_UNEXPECTED_END; 1.139 + } 1.140 + if (XMLUtils::isLetter(*mPosition)) { 1.141 + while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { 1.142 + /* just go */ 1.143 + } 1.144 + } 1.145 + else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) { 1.146 + // eat wildcard for NameTest, bail for var ref at COLON 1.147 + ++mPosition; 1.148 + } 1.149 + else { 1.150 + --mPosition; // step back 1.151 + } 1.152 + } 1.153 + if (nextIsOperatorToken(prevToken)) { 1.154 + nsDependentSubstring op(Substring(start, mPosition)); 1.155 + if (nsGkAtoms::_and->Equals(op)) { 1.156 + defType = Token::AND_OP; 1.157 + } 1.158 + else if (nsGkAtoms::_or->Equals(op)) { 1.159 + defType = Token::OR_OP; 1.160 + } 1.161 + else if (nsGkAtoms::mod->Equals(op)) { 1.162 + defType = Token::MODULUS_OP; 1.163 + } 1.164 + else if (nsGkAtoms::div->Equals(op)) { 1.165 + defType = Token::DIVIDE_OP; 1.166 + } 1.167 + else { 1.168 + // XXX QUESTION: spec is not too precise 1.169 + // badops is sure an error, but is bad:ops, too? We say yes! 1.170 + return NS_ERROR_XPATH_OPERATOR_EXPECTED; 1.171 + } 1.172 + } 1.173 + newToken = new Token(start, mPosition, defType); 1.174 + } 1.175 + else if (isXPathDigit(*mPosition)) { 1.176 + start = mPosition; 1.177 + while (++mPosition < end && isXPathDigit(*mPosition)) { 1.178 + /* just go */ 1.179 + } 1.180 + if (mPosition < end && *mPosition == '.') { 1.181 + while (++mPosition < end && isXPathDigit(*mPosition)) { 1.182 + /* just go */ 1.183 + } 1.184 + } 1.185 + newToken = new Token(start, mPosition, Token::NUMBER); 1.186 + } 1.187 + else { 1.188 + switch (*mPosition) { 1.189 + //-- ignore whitespace 1.190 + case SPACE: 1.191 + case TX_TAB: 1.192 + case TX_CR: 1.193 + case TX_LF: 1.194 + ++mPosition; 1.195 + isToken = false; 1.196 + break; 1.197 + case S_QUOTE : 1.198 + case D_QUOTE : 1.199 + start = mPosition; 1.200 + while (++mPosition < end && *mPosition != *start) { 1.201 + // eat literal 1.202 + } 1.203 + if (mPosition == end) { 1.204 + mPosition = start; 1.205 + return NS_ERROR_XPATH_UNCLOSED_LITERAL; 1.206 + } 1.207 + newToken = new Token(start + 1, mPosition, Token::LITERAL); 1.208 + ++mPosition; 1.209 + break; 1.210 + case PERIOD: 1.211 + // period can be .., .(DIGITS)+ or ., check next 1.212 + if (++mPosition == end) { 1.213 + newToken = new Token(mPosition - 1, Token::SELF_NODE); 1.214 + } 1.215 + else if (isXPathDigit(*mPosition)) { 1.216 + start = mPosition - 1; 1.217 + while (++mPosition < end && isXPathDigit(*mPosition)) { 1.218 + /* just go */ 1.219 + } 1.220 + newToken = new Token(start, mPosition, Token::NUMBER); 1.221 + } 1.222 + else if (*mPosition == PERIOD) { 1.223 + ++mPosition; 1.224 + newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE); 1.225 + } 1.226 + else { 1.227 + newToken = new Token(mPosition - 1, Token::SELF_NODE); 1.228 + } 1.229 + break; 1.230 + case COLON: // QNames are dealt above, must be axis ident 1.231 + if (++mPosition >= end || *mPosition != COLON || 1.232 + prevToken->mType != Token::CNAME) { 1.233 + return NS_ERROR_XPATH_BAD_COLON; 1.234 + } 1.235 + prevToken->mType = Token::AXIS_IDENTIFIER; 1.236 + ++mPosition; 1.237 + isToken = false; 1.238 + break; 1.239 + case FORWARD_SLASH : 1.240 + if (++mPosition < end && *mPosition == FORWARD_SLASH) { 1.241 + ++mPosition; 1.242 + newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP); 1.243 + } 1.244 + else { 1.245 + newToken = new Token(mPosition - 1, Token::PARENT_OP); 1.246 + } 1.247 + break; 1.248 + case BANG : // can only be != 1.249 + if (++mPosition < end && *mPosition == EQUAL) { 1.250 + ++mPosition; 1.251 + newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP); 1.252 + break; 1.253 + } 1.254 + // Error ! is not not() 1.255 + return NS_ERROR_XPATH_BAD_BANG; 1.256 + case EQUAL: 1.257 + newToken = new Token(mPosition, Token::EQUAL_OP); 1.258 + ++mPosition; 1.259 + break; 1.260 + case L_ANGLE: 1.261 + if (++mPosition == end) { 1.262 + return NS_ERROR_XPATH_UNEXPECTED_END; 1.263 + } 1.264 + if (*mPosition == EQUAL) { 1.265 + ++mPosition; 1.266 + newToken = new Token(mPosition - 2, mPosition, 1.267 + Token::LESS_OR_EQUAL_OP); 1.268 + } 1.269 + else { 1.270 + newToken = new Token(mPosition - 1, Token::LESS_THAN_OP); 1.271 + } 1.272 + break; 1.273 + case R_ANGLE: 1.274 + if (++mPosition == end) { 1.275 + return NS_ERROR_XPATH_UNEXPECTED_END; 1.276 + } 1.277 + if (*mPosition == EQUAL) { 1.278 + ++mPosition; 1.279 + newToken = new Token(mPosition - 2, mPosition, 1.280 + Token::GREATER_OR_EQUAL_OP); 1.281 + } 1.282 + else { 1.283 + newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP); 1.284 + } 1.285 + break; 1.286 + case HYPHEN : 1.287 + newToken = new Token(mPosition, Token::SUBTRACTION_OP); 1.288 + ++mPosition; 1.289 + break; 1.290 + case ASTERIX: 1.291 + if (nextIsOperatorToken(prevToken)) { 1.292 + newToken = new Token(mPosition, Token::MULTIPLY_OP); 1.293 + } 1.294 + else { 1.295 + newToken = new Token(mPosition, Token::CNAME); 1.296 + } 1.297 + ++mPosition; 1.298 + break; 1.299 + case L_PAREN: 1.300 + if (prevToken->mType == Token::CNAME) { 1.301 + const nsDependentSubstring& val = prevToken->Value(); 1.302 + if (val.EqualsLiteral("comment")) { 1.303 + prevToken->mType = Token::COMMENT_AND_PAREN; 1.304 + } 1.305 + else if (val.EqualsLiteral("node")) { 1.306 + prevToken->mType = Token::NODE_AND_PAREN; 1.307 + } 1.308 + else if (val.EqualsLiteral("processing-instruction")) { 1.309 + prevToken->mType = Token::PROC_INST_AND_PAREN; 1.310 + } 1.311 + else if (val.EqualsLiteral("text")) { 1.312 + prevToken->mType = Token::TEXT_AND_PAREN; 1.313 + } 1.314 + else { 1.315 + prevToken->mType = Token::FUNCTION_NAME_AND_PAREN; 1.316 + } 1.317 + isToken = false; 1.318 + } 1.319 + else { 1.320 + newToken = new Token(mPosition, Token::L_PAREN); 1.321 + } 1.322 + ++mPosition; 1.323 + break; 1.324 + case R_PAREN: 1.325 + newToken = new Token(mPosition, Token::R_PAREN); 1.326 + ++mPosition; 1.327 + break; 1.328 + case L_BRACKET: 1.329 + newToken = new Token(mPosition, Token::L_BRACKET); 1.330 + ++mPosition; 1.331 + break; 1.332 + case R_BRACKET: 1.333 + newToken = new Token(mPosition, Token::R_BRACKET); 1.334 + ++mPosition; 1.335 + break; 1.336 + case COMMA: 1.337 + newToken = new Token(mPosition, Token::COMMA); 1.338 + ++mPosition; 1.339 + break; 1.340 + case AT_SIGN : 1.341 + newToken = new Token(mPosition, Token::AT_SIGN); 1.342 + ++mPosition; 1.343 + break; 1.344 + case PLUS: 1.345 + newToken = new Token(mPosition, Token::ADDITION_OP); 1.346 + ++mPosition; 1.347 + break; 1.348 + case VERT_BAR: 1.349 + newToken = new Token(mPosition, Token::UNION_OP); 1.350 + ++mPosition; 1.351 + break; 1.352 + default: 1.353 + // Error, don't grok character :-( 1.354 + return NS_ERROR_XPATH_ILLEGAL_CHAR; 1.355 + } 1.356 + } 1.357 + if (isToken) { 1.358 + NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY); 1.359 + NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE); 1.360 + prevToken = newToken; 1.361 + addToken(newToken); 1.362 + } 1.363 + } 1.364 + 1.365 + // add a endToken to the list 1.366 + newToken = new Token(end, end, Token::END); 1.367 + if (!newToken) { 1.368 + return NS_ERROR_OUT_OF_MEMORY; 1.369 + } 1.370 + addToken(newToken); 1.371 + 1.372 + return NS_OK; 1.373 +}