michael@0: /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /** michael@0: * Lexical analyzer for XPath expressions michael@0: */ michael@0: michael@0: #include "txExprLexer.h" michael@0: #include "nsGkAtoms.h" michael@0: #include "nsString.h" michael@0: #include "nsError.h" michael@0: #include "txXMLUtils.h" michael@0: michael@0: /** michael@0: * Creates a new ExprLexer michael@0: */ michael@0: txExprLexer::txExprLexer() michael@0: : mCurrentItem(nullptr), michael@0: mFirstItem(nullptr), michael@0: mLastItem(nullptr), michael@0: mTokenCount(0) michael@0: { michael@0: } michael@0: michael@0: /** michael@0: * Destroys this instance of an txExprLexer michael@0: */ michael@0: txExprLexer::~txExprLexer() michael@0: { michael@0: //-- delete tokens michael@0: Token* tok = mFirstItem; michael@0: while (tok) { michael@0: Token* temp = tok->mNext; michael@0: delete tok; michael@0: tok = temp; michael@0: } michael@0: mCurrentItem = nullptr; michael@0: } michael@0: michael@0: Token* michael@0: txExprLexer::nextToken() michael@0: { michael@0: if (!mCurrentItem) { michael@0: NS_NOTREACHED("nextToken called on uninitialized lexer"); michael@0: return nullptr; michael@0: } michael@0: michael@0: if (mCurrentItem->mType == Token::END) { michael@0: // Do not progress beyond the end token michael@0: return mCurrentItem; michael@0: } michael@0: michael@0: Token* token = mCurrentItem; michael@0: mCurrentItem = mCurrentItem->mNext; michael@0: return token; michael@0: } michael@0: michael@0: void michael@0: txExprLexer::addToken(Token* aToken) michael@0: { michael@0: if (mLastItem) { michael@0: mLastItem->mNext = aToken; michael@0: } michael@0: if (!mFirstItem) { michael@0: mFirstItem = aToken; michael@0: mCurrentItem = aToken; michael@0: } michael@0: mLastItem = aToken; michael@0: ++mTokenCount; michael@0: } michael@0: michael@0: /** michael@0: * Returns true if the following Token should be an operator. michael@0: * This is a helper for the first bullet of [XPath 3.7] michael@0: * Lexical Structure michael@0: */ michael@0: bool michael@0: txExprLexer::nextIsOperatorToken(Token* aToken) michael@0: { michael@0: if (!aToken || aToken->mType == Token::NULL_TOKEN) { michael@0: return false; michael@0: } michael@0: /* This relies on the tokens having the right order in txExprLexer.h */ michael@0: return aToken->mType < Token::COMMA || michael@0: aToken->mType > Token::UNION_OP; michael@0: michael@0: } michael@0: michael@0: /** michael@0: * Parses the given string into a sequence of Tokens michael@0: */ michael@0: nsresult michael@0: txExprLexer::parse(const nsASingleFragmentString& aPattern) michael@0: { michael@0: iterator start, end; michael@0: start = aPattern.BeginReading(mPosition); michael@0: aPattern.EndReading(end); michael@0: michael@0: //-- initialize previous token, this will automatically get michael@0: //-- deleted when it goes out of scope michael@0: Token nullToken(nullptr, nullptr, Token::NULL_TOKEN); michael@0: michael@0: Token::Type defType; michael@0: Token* newToken = nullptr; michael@0: Token* prevToken = &nullToken; michael@0: bool isToken; michael@0: michael@0: while (mPosition < end) { michael@0: michael@0: defType = Token::CNAME; michael@0: isToken = true; michael@0: michael@0: if (*mPosition == DOLLAR_SIGN) { michael@0: if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) { michael@0: return NS_ERROR_XPATH_INVALID_VAR_NAME; michael@0: } michael@0: defType = Token::VAR_REFERENCE; michael@0: } michael@0: // just reuse the QName parsing, which will use defType michael@0: // the token to construct michael@0: michael@0: if (XMLUtils::isLetter(*mPosition)) { michael@0: // NCName, can get QName or OperatorName; michael@0: // FunctionName, NodeName, and AxisSpecifier may want whitespace, michael@0: // and are dealt with below michael@0: start = mPosition; michael@0: while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { michael@0: /* just go */ michael@0: } michael@0: if (mPosition < end && *mPosition == COLON) { michael@0: // try QName or wildcard, might need to step back for axis michael@0: if (++mPosition == end) { michael@0: return NS_ERROR_XPATH_UNEXPECTED_END; michael@0: } michael@0: if (XMLUtils::isLetter(*mPosition)) { michael@0: while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) { michael@0: /* just go */ michael@0: } michael@0: } michael@0: else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) { michael@0: // eat wildcard for NameTest, bail for var ref at COLON michael@0: ++mPosition; michael@0: } michael@0: else { michael@0: --mPosition; // step back michael@0: } michael@0: } michael@0: if (nextIsOperatorToken(prevToken)) { michael@0: nsDependentSubstring op(Substring(start, mPosition)); michael@0: if (nsGkAtoms::_and->Equals(op)) { michael@0: defType = Token::AND_OP; michael@0: } michael@0: else if (nsGkAtoms::_or->Equals(op)) { michael@0: defType = Token::OR_OP; michael@0: } michael@0: else if (nsGkAtoms::mod->Equals(op)) { michael@0: defType = Token::MODULUS_OP; michael@0: } michael@0: else if (nsGkAtoms::div->Equals(op)) { michael@0: defType = Token::DIVIDE_OP; michael@0: } michael@0: else { michael@0: // XXX QUESTION: spec is not too precise michael@0: // badops is sure an error, but is bad:ops, too? We say yes! michael@0: return NS_ERROR_XPATH_OPERATOR_EXPECTED; michael@0: } michael@0: } michael@0: newToken = new Token(start, mPosition, defType); michael@0: } michael@0: else if (isXPathDigit(*mPosition)) { michael@0: start = mPosition; michael@0: while (++mPosition < end && isXPathDigit(*mPosition)) { michael@0: /* just go */ michael@0: } michael@0: if (mPosition < end && *mPosition == '.') { michael@0: while (++mPosition < end && isXPathDigit(*mPosition)) { michael@0: /* just go */ michael@0: } michael@0: } michael@0: newToken = new Token(start, mPosition, Token::NUMBER); michael@0: } michael@0: else { michael@0: switch (*mPosition) { michael@0: //-- ignore whitespace michael@0: case SPACE: michael@0: case TX_TAB: michael@0: case TX_CR: michael@0: case TX_LF: michael@0: ++mPosition; michael@0: isToken = false; michael@0: break; michael@0: case S_QUOTE : michael@0: case D_QUOTE : michael@0: start = mPosition; michael@0: while (++mPosition < end && *mPosition != *start) { michael@0: // eat literal michael@0: } michael@0: if (mPosition == end) { michael@0: mPosition = start; michael@0: return NS_ERROR_XPATH_UNCLOSED_LITERAL; michael@0: } michael@0: newToken = new Token(start + 1, mPosition, Token::LITERAL); michael@0: ++mPosition; michael@0: break; michael@0: case PERIOD: michael@0: // period can be .., .(DIGITS)+ or ., check next michael@0: if (++mPosition == end) { michael@0: newToken = new Token(mPosition - 1, Token::SELF_NODE); michael@0: } michael@0: else if (isXPathDigit(*mPosition)) { michael@0: start = mPosition - 1; michael@0: while (++mPosition < end && isXPathDigit(*mPosition)) { michael@0: /* just go */ michael@0: } michael@0: newToken = new Token(start, mPosition, Token::NUMBER); michael@0: } michael@0: else if (*mPosition == PERIOD) { michael@0: ++mPosition; michael@0: newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE); michael@0: } michael@0: else { michael@0: newToken = new Token(mPosition - 1, Token::SELF_NODE); michael@0: } michael@0: break; michael@0: case COLON: // QNames are dealt above, must be axis ident michael@0: if (++mPosition >= end || *mPosition != COLON || michael@0: prevToken->mType != Token::CNAME) { michael@0: return NS_ERROR_XPATH_BAD_COLON; michael@0: } michael@0: prevToken->mType = Token::AXIS_IDENTIFIER; michael@0: ++mPosition; michael@0: isToken = false; michael@0: break; michael@0: case FORWARD_SLASH : michael@0: if (++mPosition < end && *mPosition == FORWARD_SLASH) { michael@0: ++mPosition; michael@0: newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP); michael@0: } michael@0: else { michael@0: newToken = new Token(mPosition - 1, Token::PARENT_OP); michael@0: } michael@0: break; michael@0: case BANG : // can only be != michael@0: if (++mPosition < end && *mPosition == EQUAL) { michael@0: ++mPosition; michael@0: newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP); michael@0: break; michael@0: } michael@0: // Error ! is not not() michael@0: return NS_ERROR_XPATH_BAD_BANG; michael@0: case EQUAL: michael@0: newToken = new Token(mPosition, Token::EQUAL_OP); michael@0: ++mPosition; michael@0: break; michael@0: case L_ANGLE: michael@0: if (++mPosition == end) { michael@0: return NS_ERROR_XPATH_UNEXPECTED_END; michael@0: } michael@0: if (*mPosition == EQUAL) { michael@0: ++mPosition; michael@0: newToken = new Token(mPosition - 2, mPosition, michael@0: Token::LESS_OR_EQUAL_OP); michael@0: } michael@0: else { michael@0: newToken = new Token(mPosition - 1, Token::LESS_THAN_OP); michael@0: } michael@0: break; michael@0: case R_ANGLE: michael@0: if (++mPosition == end) { michael@0: return NS_ERROR_XPATH_UNEXPECTED_END; michael@0: } michael@0: if (*mPosition == EQUAL) { michael@0: ++mPosition; michael@0: newToken = new Token(mPosition - 2, mPosition, michael@0: Token::GREATER_OR_EQUAL_OP); michael@0: } michael@0: else { michael@0: newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP); michael@0: } michael@0: break; michael@0: case HYPHEN : michael@0: newToken = new Token(mPosition, Token::SUBTRACTION_OP); michael@0: ++mPosition; michael@0: break; michael@0: case ASTERIX: michael@0: if (nextIsOperatorToken(prevToken)) { michael@0: newToken = new Token(mPosition, Token::MULTIPLY_OP); michael@0: } michael@0: else { michael@0: newToken = new Token(mPosition, Token::CNAME); michael@0: } michael@0: ++mPosition; michael@0: break; michael@0: case L_PAREN: michael@0: if (prevToken->mType == Token::CNAME) { michael@0: const nsDependentSubstring& val = prevToken->Value(); michael@0: if (val.EqualsLiteral("comment")) { michael@0: prevToken->mType = Token::COMMENT_AND_PAREN; michael@0: } michael@0: else if (val.EqualsLiteral("node")) { michael@0: prevToken->mType = Token::NODE_AND_PAREN; michael@0: } michael@0: else if (val.EqualsLiteral("processing-instruction")) { michael@0: prevToken->mType = Token::PROC_INST_AND_PAREN; michael@0: } michael@0: else if (val.EqualsLiteral("text")) { michael@0: prevToken->mType = Token::TEXT_AND_PAREN; michael@0: } michael@0: else { michael@0: prevToken->mType = Token::FUNCTION_NAME_AND_PAREN; michael@0: } michael@0: isToken = false; michael@0: } michael@0: else { michael@0: newToken = new Token(mPosition, Token::L_PAREN); michael@0: } michael@0: ++mPosition; michael@0: break; michael@0: case R_PAREN: michael@0: newToken = new Token(mPosition, Token::R_PAREN); michael@0: ++mPosition; michael@0: break; michael@0: case L_BRACKET: michael@0: newToken = new Token(mPosition, Token::L_BRACKET); michael@0: ++mPosition; michael@0: break; michael@0: case R_BRACKET: michael@0: newToken = new Token(mPosition, Token::R_BRACKET); michael@0: ++mPosition; michael@0: break; michael@0: case COMMA: michael@0: newToken = new Token(mPosition, Token::COMMA); michael@0: ++mPosition; michael@0: break; michael@0: case AT_SIGN : michael@0: newToken = new Token(mPosition, Token::AT_SIGN); michael@0: ++mPosition; michael@0: break; michael@0: case PLUS: michael@0: newToken = new Token(mPosition, Token::ADDITION_OP); michael@0: ++mPosition; michael@0: break; michael@0: case VERT_BAR: michael@0: newToken = new Token(mPosition, Token::UNION_OP); michael@0: ++mPosition; michael@0: break; michael@0: default: michael@0: // Error, don't grok character :-( michael@0: return NS_ERROR_XPATH_ILLEGAL_CHAR; michael@0: } michael@0: } michael@0: if (isToken) { michael@0: NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY); michael@0: NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE); michael@0: prevToken = newToken; michael@0: addToken(newToken); michael@0: } michael@0: } michael@0: michael@0: // add a endToken to the list michael@0: newToken = new Token(end, end, Token::END); michael@0: if (!newToken) { michael@0: return NS_ERROR_OUT_OF_MEMORY; michael@0: } michael@0: addToken(newToken); michael@0: michael@0: return NS_OK; michael@0: }