michael@0: /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: michael@0: #ifndef MITREXSL_EXPRLEXER_H michael@0: #define MITREXSL_EXPRLEXER_H michael@0: michael@0: #include "txCore.h" michael@0: #include "nsString.h" michael@0: michael@0: /** michael@0: * A Token class for the ExprLexer. michael@0: * michael@0: * This class was ported from XSL:P, an open source Java based michael@0: * XSLT processor, written by yours truly. michael@0: */ michael@0: class Token michael@0: { michael@0: public: michael@0: michael@0: /** michael@0: * Token types michael@0: */ michael@0: enum Type { michael@0: //-- Trivial Tokens michael@0: NULL_TOKEN = 1, michael@0: LITERAL, michael@0: NUMBER, michael@0: CNAME, michael@0: VAR_REFERENCE, michael@0: PARENT_NODE, michael@0: SELF_NODE, michael@0: R_PAREN, michael@0: R_BRACKET, // 9 michael@0: /** michael@0: * start of tokens for 3.7, bullet 1 michael@0: * ExprLexer::nextIsOperatorToken bails if the tokens aren't michael@0: * consecutive. michael@0: */ michael@0: COMMA, michael@0: AT_SIGN, michael@0: L_PAREN, michael@0: L_BRACKET, michael@0: AXIS_IDENTIFIER, michael@0: michael@0: // These tokens include their following left parenthesis michael@0: FUNCTION_NAME_AND_PAREN, // 15 michael@0: COMMENT_AND_PAREN, michael@0: NODE_AND_PAREN, michael@0: PROC_INST_AND_PAREN, michael@0: TEXT_AND_PAREN, michael@0: michael@0: /** michael@0: * operators michael@0: */ michael@0: //-- boolean ops michael@0: AND_OP, // 20 michael@0: OR_OP, michael@0: michael@0: //-- relational michael@0: EQUAL_OP, // 22 michael@0: NOT_EQUAL_OP, michael@0: LESS_THAN_OP, michael@0: GREATER_THAN_OP, michael@0: LESS_OR_EQUAL_OP, michael@0: GREATER_OR_EQUAL_OP, michael@0: //-- additive operators michael@0: ADDITION_OP, // 28 michael@0: SUBTRACTION_OP, michael@0: //-- multiplicative michael@0: DIVIDE_OP, // 30 michael@0: MULTIPLY_OP, michael@0: MODULUS_OP, michael@0: //-- path operators michael@0: PARENT_OP, // 33 michael@0: ANCESTOR_OP, michael@0: UNION_OP, michael@0: /** michael@0: * end of tokens for 3.7, bullet 1 -/ michael@0: */ michael@0: //-- Special endtoken michael@0: END // 36 michael@0: }; michael@0: michael@0: michael@0: /** michael@0: * Constructors michael@0: */ michael@0: typedef nsASingleFragmentString::const_char_iterator iterator; michael@0: michael@0: Token(iterator aStart, iterator aEnd, Type aType) michael@0: : mStart(aStart), michael@0: mEnd(aEnd), michael@0: mType(aType), michael@0: mNext(nullptr) michael@0: { michael@0: } michael@0: Token(iterator aChar, Type aType) michael@0: : mStart(aChar), michael@0: mEnd(aChar + 1), michael@0: mType(aType), michael@0: mNext(nullptr) michael@0: { michael@0: } michael@0: michael@0: const nsDependentSubstring Value() michael@0: { michael@0: return Substring(mStart, mEnd); michael@0: } michael@0: michael@0: iterator mStart, mEnd; michael@0: Type mType; michael@0: Token* mNext; michael@0: }; michael@0: michael@0: /** michael@0: * A class for splitting an "Expr" String into tokens and michael@0: * performing basic Lexical Analysis. michael@0: * michael@0: * This class was ported from XSL:P, an open source Java based XSL processor michael@0: */ michael@0: michael@0: class txExprLexer michael@0: { michael@0: public: michael@0: michael@0: txExprLexer(); michael@0: ~txExprLexer(); michael@0: michael@0: /** michael@0: * Parse the given string. michael@0: * returns an error result if lexing failed. michael@0: * The given string must outlive the use of the lexer, as the michael@0: * generated Tokens point to Substrings of it. michael@0: * mPosition points to the offending location in case of an error. michael@0: */ michael@0: nsresult parse(const nsASingleFragmentString& aPattern); michael@0: michael@0: typedef nsASingleFragmentString::const_char_iterator iterator; michael@0: iterator mPosition; michael@0: michael@0: /** michael@0: * Functions for iterating over the TokenList michael@0: */ michael@0: michael@0: Token* nextToken(); michael@0: Token* peek() michael@0: { michael@0: NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer"); michael@0: return mCurrentItem; michael@0: } michael@0: Token* peekAhead() michael@0: { michael@0: NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer"); michael@0: // Don't peek past the end node michael@0: return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext : mCurrentItem; michael@0: } michael@0: bool hasMoreTokens() michael@0: { michael@0: NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer"); michael@0: return (mCurrentItem && mCurrentItem->mType != Token::END); michael@0: } michael@0: michael@0: /** michael@0: * Trivial Tokens michael@0: */ michael@0: //-- LF, changed to enum michael@0: enum _TrivialTokens { michael@0: D_QUOTE = '\"', michael@0: S_QUOTE = '\'', michael@0: L_PAREN = '(', michael@0: R_PAREN = ')', michael@0: L_BRACKET = '[', michael@0: R_BRACKET = ']', michael@0: L_ANGLE = '<', michael@0: R_ANGLE = '>', michael@0: COMMA = ',', michael@0: PERIOD = '.', michael@0: ASTERIX = '*', michael@0: FORWARD_SLASH = '/', michael@0: EQUAL = '=', michael@0: BANG = '!', michael@0: VERT_BAR = '|', michael@0: AT_SIGN = '@', michael@0: DOLLAR_SIGN = '$', michael@0: PLUS = '+', michael@0: HYPHEN = '-', michael@0: COLON = ':', michael@0: //-- whitespace tokens michael@0: SPACE = ' ', michael@0: TX_TAB = '\t', michael@0: TX_CR = '\n', michael@0: TX_LF = '\r' michael@0: }; michael@0: michael@0: private: michael@0: michael@0: Token* mCurrentItem; michael@0: Token* mFirstItem; michael@0: Token* mLastItem; michael@0: michael@0: int mTokenCount; michael@0: michael@0: void addToken(Token* aToken); michael@0: michael@0: /** michael@0: * Returns true if the following Token should be an operator. michael@0: * This is a helper for the first bullet of [XPath 3.7] michael@0: * Lexical Structure michael@0: */ michael@0: bool nextIsOperatorToken(Token* aToken); michael@0: michael@0: /** michael@0: * Returns true if the given character represents a numeric letter (digit) michael@0: * Implemented in ExprLexerChars.cpp michael@0: */ michael@0: static bool isXPathDigit(char16_t ch) michael@0: { michael@0: return (ch >= '0' && ch <= '9'); michael@0: } michael@0: }; michael@0: michael@0: #endif michael@0: