1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/dom/xslt/xpath/txExprLexer.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,226 @@ 1.4 +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 + 1.10 +#ifndef MITREXSL_EXPRLEXER_H 1.11 +#define MITREXSL_EXPRLEXER_H 1.12 + 1.13 +#include "txCore.h" 1.14 +#include "nsString.h" 1.15 + 1.16 +/** 1.17 + * A Token class for the ExprLexer. 1.18 + * 1.19 + * This class was ported from XSL:P, an open source Java based 1.20 + * XSLT processor, written by yours truly. 1.21 + */ 1.22 +class Token 1.23 +{ 1.24 +public: 1.25 + 1.26 + /** 1.27 + * Token types 1.28 + */ 1.29 + enum Type { 1.30 + //-- Trivial Tokens 1.31 + NULL_TOKEN = 1, 1.32 + LITERAL, 1.33 + NUMBER, 1.34 + CNAME, 1.35 + VAR_REFERENCE, 1.36 + PARENT_NODE, 1.37 + SELF_NODE, 1.38 + R_PAREN, 1.39 + R_BRACKET, // 9 1.40 + /** 1.41 + * start of tokens for 3.7, bullet 1 1.42 + * ExprLexer::nextIsOperatorToken bails if the tokens aren't 1.43 + * consecutive. 1.44 + */ 1.45 + COMMA, 1.46 + AT_SIGN, 1.47 + L_PAREN, 1.48 + L_BRACKET, 1.49 + AXIS_IDENTIFIER, 1.50 + 1.51 + // These tokens include their following left parenthesis 1.52 + FUNCTION_NAME_AND_PAREN, // 15 1.53 + COMMENT_AND_PAREN, 1.54 + NODE_AND_PAREN, 1.55 + PROC_INST_AND_PAREN, 1.56 + TEXT_AND_PAREN, 1.57 + 1.58 + /** 1.59 + * operators 1.60 + */ 1.61 + //-- boolean ops 1.62 + AND_OP, // 20 1.63 + OR_OP, 1.64 + 1.65 + //-- relational 1.66 + EQUAL_OP, // 22 1.67 + NOT_EQUAL_OP, 1.68 + LESS_THAN_OP, 1.69 + GREATER_THAN_OP, 1.70 + LESS_OR_EQUAL_OP, 1.71 + GREATER_OR_EQUAL_OP, 1.72 + //-- additive operators 1.73 + ADDITION_OP, // 28 1.74 + SUBTRACTION_OP, 1.75 + //-- multiplicative 1.76 + DIVIDE_OP, // 30 1.77 + MULTIPLY_OP, 1.78 + MODULUS_OP, 1.79 + //-- path operators 1.80 + PARENT_OP, // 33 1.81 + ANCESTOR_OP, 1.82 + UNION_OP, 1.83 + /** 1.84 + * end of tokens for 3.7, bullet 1 -/ 1.85 + */ 1.86 + //-- Special endtoken 1.87 + END // 36 1.88 + }; 1.89 + 1.90 + 1.91 + /** 1.92 + * Constructors 1.93 + */ 1.94 + typedef nsASingleFragmentString::const_char_iterator iterator; 1.95 + 1.96 + Token(iterator aStart, iterator aEnd, Type aType) 1.97 + : mStart(aStart), 1.98 + mEnd(aEnd), 1.99 + mType(aType), 1.100 + mNext(nullptr) 1.101 + { 1.102 + } 1.103 + Token(iterator aChar, Type aType) 1.104 + : mStart(aChar), 1.105 + mEnd(aChar + 1), 1.106 + mType(aType), 1.107 + mNext(nullptr) 1.108 + { 1.109 + } 1.110 + 1.111 + const nsDependentSubstring Value() 1.112 + { 1.113 + return Substring(mStart, mEnd); 1.114 + } 1.115 + 1.116 + iterator mStart, mEnd; 1.117 + Type mType; 1.118 + Token* mNext; 1.119 +}; 1.120 + 1.121 +/** 1.122 + * A class for splitting an "Expr" String into tokens and 1.123 + * performing basic Lexical Analysis. 1.124 + * 1.125 + * This class was ported from XSL:P, an open source Java based XSL processor 1.126 + */ 1.127 + 1.128 +class txExprLexer 1.129 +{ 1.130 +public: 1.131 + 1.132 + txExprLexer(); 1.133 + ~txExprLexer(); 1.134 + 1.135 + /** 1.136 + * Parse the given string. 1.137 + * returns an error result if lexing failed. 1.138 + * The given string must outlive the use of the lexer, as the 1.139 + * generated Tokens point to Substrings of it. 1.140 + * mPosition points to the offending location in case of an error. 1.141 + */ 1.142 + nsresult parse(const nsASingleFragmentString& aPattern); 1.143 + 1.144 + typedef nsASingleFragmentString::const_char_iterator iterator; 1.145 + iterator mPosition; 1.146 + 1.147 + /** 1.148 + * Functions for iterating over the TokenList 1.149 + */ 1.150 + 1.151 + Token* nextToken(); 1.152 + Token* peek() 1.153 + { 1.154 + NS_ASSERTION(mCurrentItem, "peek called uninitialized lexer"); 1.155 + return mCurrentItem; 1.156 + } 1.157 + Token* peekAhead() 1.158 + { 1.159 + NS_ASSERTION(mCurrentItem, "peekAhead called on uninitialized lexer"); 1.160 + // Don't peek past the end node 1.161 + return (mCurrentItem && mCurrentItem->mNext) ? mCurrentItem->mNext : mCurrentItem; 1.162 + } 1.163 + bool hasMoreTokens() 1.164 + { 1.165 + NS_ASSERTION(mCurrentItem, "HasMoreTokens called on uninitialized lexer"); 1.166 + return (mCurrentItem && mCurrentItem->mType != Token::END); 1.167 + } 1.168 + 1.169 + /** 1.170 + * Trivial Tokens 1.171 + */ 1.172 + //-- LF, changed to enum 1.173 + enum _TrivialTokens { 1.174 + D_QUOTE = '\"', 1.175 + S_QUOTE = '\'', 1.176 + L_PAREN = '(', 1.177 + R_PAREN = ')', 1.178 + L_BRACKET = '[', 1.179 + R_BRACKET = ']', 1.180 + L_ANGLE = '<', 1.181 + R_ANGLE = '>', 1.182 + COMMA = ',', 1.183 + PERIOD = '.', 1.184 + ASTERIX = '*', 1.185 + FORWARD_SLASH = '/', 1.186 + EQUAL = '=', 1.187 + BANG = '!', 1.188 + VERT_BAR = '|', 1.189 + AT_SIGN = '@', 1.190 + DOLLAR_SIGN = '$', 1.191 + PLUS = '+', 1.192 + HYPHEN = '-', 1.193 + COLON = ':', 1.194 + //-- whitespace tokens 1.195 + SPACE = ' ', 1.196 + TX_TAB = '\t', 1.197 + TX_CR = '\n', 1.198 + TX_LF = '\r' 1.199 + }; 1.200 + 1.201 +private: 1.202 + 1.203 + Token* mCurrentItem; 1.204 + Token* mFirstItem; 1.205 + Token* mLastItem; 1.206 + 1.207 + int mTokenCount; 1.208 + 1.209 + void addToken(Token* aToken); 1.210 + 1.211 + /** 1.212 + * Returns true if the following Token should be an operator. 1.213 + * This is a helper for the first bullet of [XPath 3.7] 1.214 + * Lexical Structure 1.215 + */ 1.216 + bool nextIsOperatorToken(Token* aToken); 1.217 + 1.218 + /** 1.219 + * Returns true if the given character represents a numeric letter (digit) 1.220 + * Implemented in ExprLexerChars.cpp 1.221 + */ 1.222 + static bool isXPathDigit(char16_t ch) 1.223 + { 1.224 + return (ch >= '0' && ch <= '9'); 1.225 + } 1.226 +}; 1.227 + 1.228 +#endif 1.229 +