dom/xslt/xpath/txExprLexer.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 /**
michael@0 7 * Lexical analyzer for XPath expressions
michael@0 8 */
michael@0 9
michael@0 10 #include "txExprLexer.h"
michael@0 11 #include "nsGkAtoms.h"
michael@0 12 #include "nsString.h"
michael@0 13 #include "nsError.h"
michael@0 14 #include "txXMLUtils.h"
michael@0 15
michael@0 16 /**
michael@0 17 * Creates a new ExprLexer
michael@0 18 */
michael@0 19 txExprLexer::txExprLexer()
michael@0 20 : mCurrentItem(nullptr),
michael@0 21 mFirstItem(nullptr),
michael@0 22 mLastItem(nullptr),
michael@0 23 mTokenCount(0)
michael@0 24 {
michael@0 25 }
michael@0 26
michael@0 27 /**
michael@0 28 * Destroys this instance of an txExprLexer
michael@0 29 */
michael@0 30 txExprLexer::~txExprLexer()
michael@0 31 {
michael@0 32 //-- delete tokens
michael@0 33 Token* tok = mFirstItem;
michael@0 34 while (tok) {
michael@0 35 Token* temp = tok->mNext;
michael@0 36 delete tok;
michael@0 37 tok = temp;
michael@0 38 }
michael@0 39 mCurrentItem = nullptr;
michael@0 40 }
michael@0 41
michael@0 42 Token*
michael@0 43 txExprLexer::nextToken()
michael@0 44 {
michael@0 45 if (!mCurrentItem) {
michael@0 46 NS_NOTREACHED("nextToken called on uninitialized lexer");
michael@0 47 return nullptr;
michael@0 48 }
michael@0 49
michael@0 50 if (mCurrentItem->mType == Token::END) {
michael@0 51 // Do not progress beyond the end token
michael@0 52 return mCurrentItem;
michael@0 53 }
michael@0 54
michael@0 55 Token* token = mCurrentItem;
michael@0 56 mCurrentItem = mCurrentItem->mNext;
michael@0 57 return token;
michael@0 58 }
michael@0 59
michael@0 60 void
michael@0 61 txExprLexer::addToken(Token* aToken)
michael@0 62 {
michael@0 63 if (mLastItem) {
michael@0 64 mLastItem->mNext = aToken;
michael@0 65 }
michael@0 66 if (!mFirstItem) {
michael@0 67 mFirstItem = aToken;
michael@0 68 mCurrentItem = aToken;
michael@0 69 }
michael@0 70 mLastItem = aToken;
michael@0 71 ++mTokenCount;
michael@0 72 }
michael@0 73
michael@0 74 /**
michael@0 75 * Returns true if the following Token should be an operator.
michael@0 76 * This is a helper for the first bullet of [XPath 3.7]
michael@0 77 * Lexical Structure
michael@0 78 */
michael@0 79 bool
michael@0 80 txExprLexer::nextIsOperatorToken(Token* aToken)
michael@0 81 {
michael@0 82 if (!aToken || aToken->mType == Token::NULL_TOKEN) {
michael@0 83 return false;
michael@0 84 }
michael@0 85 /* This relies on the tokens having the right order in txExprLexer.h */
michael@0 86 return aToken->mType < Token::COMMA ||
michael@0 87 aToken->mType > Token::UNION_OP;
michael@0 88
michael@0 89 }
michael@0 90
michael@0 91 /**
michael@0 92 * Parses the given string into a sequence of Tokens
michael@0 93 */
michael@0 94 nsresult
michael@0 95 txExprLexer::parse(const nsASingleFragmentString& aPattern)
michael@0 96 {
michael@0 97 iterator start, end;
michael@0 98 start = aPattern.BeginReading(mPosition);
michael@0 99 aPattern.EndReading(end);
michael@0 100
michael@0 101 //-- initialize previous token, this will automatically get
michael@0 102 //-- deleted when it goes out of scope
michael@0 103 Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
michael@0 104
michael@0 105 Token::Type defType;
michael@0 106 Token* newToken = nullptr;
michael@0 107 Token* prevToken = &nullToken;
michael@0 108 bool isToken;
michael@0 109
michael@0 110 while (mPosition < end) {
michael@0 111
michael@0 112 defType = Token::CNAME;
michael@0 113 isToken = true;
michael@0 114
michael@0 115 if (*mPosition == DOLLAR_SIGN) {
michael@0 116 if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
michael@0 117 return NS_ERROR_XPATH_INVALID_VAR_NAME;
michael@0 118 }
michael@0 119 defType = Token::VAR_REFERENCE;
michael@0 120 }
michael@0 121 // just reuse the QName parsing, which will use defType
michael@0 122 // the token to construct
michael@0 123
michael@0 124 if (XMLUtils::isLetter(*mPosition)) {
michael@0 125 // NCName, can get QName or OperatorName;
michael@0 126 // FunctionName, NodeName, and AxisSpecifier may want whitespace,
michael@0 127 // and are dealt with below
michael@0 128 start = mPosition;
michael@0 129 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
michael@0 130 /* just go */
michael@0 131 }
michael@0 132 if (mPosition < end && *mPosition == COLON) {
michael@0 133 // try QName or wildcard, might need to step back for axis
michael@0 134 if (++mPosition == end) {
michael@0 135 return NS_ERROR_XPATH_UNEXPECTED_END;
michael@0 136 }
michael@0 137 if (XMLUtils::isLetter(*mPosition)) {
michael@0 138 while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
michael@0 139 /* just go */
michael@0 140 }
michael@0 141 }
michael@0 142 else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
michael@0 143 // eat wildcard for NameTest, bail for var ref at COLON
michael@0 144 ++mPosition;
michael@0 145 }
michael@0 146 else {
michael@0 147 --mPosition; // step back
michael@0 148 }
michael@0 149 }
michael@0 150 if (nextIsOperatorToken(prevToken)) {
michael@0 151 nsDependentSubstring op(Substring(start, mPosition));
michael@0 152 if (nsGkAtoms::_and->Equals(op)) {
michael@0 153 defType = Token::AND_OP;
michael@0 154 }
michael@0 155 else if (nsGkAtoms::_or->Equals(op)) {
michael@0 156 defType = Token::OR_OP;
michael@0 157 }
michael@0 158 else if (nsGkAtoms::mod->Equals(op)) {
michael@0 159 defType = Token::MODULUS_OP;
michael@0 160 }
michael@0 161 else if (nsGkAtoms::div->Equals(op)) {
michael@0 162 defType = Token::DIVIDE_OP;
michael@0 163 }
michael@0 164 else {
michael@0 165 // XXX QUESTION: spec is not too precise
michael@0 166 // badops is sure an error, but is bad:ops, too? We say yes!
michael@0 167 return NS_ERROR_XPATH_OPERATOR_EXPECTED;
michael@0 168 }
michael@0 169 }
michael@0 170 newToken = new Token(start, mPosition, defType);
michael@0 171 }
michael@0 172 else if (isXPathDigit(*mPosition)) {
michael@0 173 start = mPosition;
michael@0 174 while (++mPosition < end && isXPathDigit(*mPosition)) {
michael@0 175 /* just go */
michael@0 176 }
michael@0 177 if (mPosition < end && *mPosition == '.') {
michael@0 178 while (++mPosition < end && isXPathDigit(*mPosition)) {
michael@0 179 /* just go */
michael@0 180 }
michael@0 181 }
michael@0 182 newToken = new Token(start, mPosition, Token::NUMBER);
michael@0 183 }
michael@0 184 else {
michael@0 185 switch (*mPosition) {
michael@0 186 //-- ignore whitespace
michael@0 187 case SPACE:
michael@0 188 case TX_TAB:
michael@0 189 case TX_CR:
michael@0 190 case TX_LF:
michael@0 191 ++mPosition;
michael@0 192 isToken = false;
michael@0 193 break;
michael@0 194 case S_QUOTE :
michael@0 195 case D_QUOTE :
michael@0 196 start = mPosition;
michael@0 197 while (++mPosition < end && *mPosition != *start) {
michael@0 198 // eat literal
michael@0 199 }
michael@0 200 if (mPosition == end) {
michael@0 201 mPosition = start;
michael@0 202 return NS_ERROR_XPATH_UNCLOSED_LITERAL;
michael@0 203 }
michael@0 204 newToken = new Token(start + 1, mPosition, Token::LITERAL);
michael@0 205 ++mPosition;
michael@0 206 break;
michael@0 207 case PERIOD:
michael@0 208 // period can be .., .(DIGITS)+ or ., check next
michael@0 209 if (++mPosition == end) {
michael@0 210 newToken = new Token(mPosition - 1, Token::SELF_NODE);
michael@0 211 }
michael@0 212 else if (isXPathDigit(*mPosition)) {
michael@0 213 start = mPosition - 1;
michael@0 214 while (++mPosition < end && isXPathDigit(*mPosition)) {
michael@0 215 /* just go */
michael@0 216 }
michael@0 217 newToken = new Token(start, mPosition, Token::NUMBER);
michael@0 218 }
michael@0 219 else if (*mPosition == PERIOD) {
michael@0 220 ++mPosition;
michael@0 221 newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
michael@0 222 }
michael@0 223 else {
michael@0 224 newToken = new Token(mPosition - 1, Token::SELF_NODE);
michael@0 225 }
michael@0 226 break;
michael@0 227 case COLON: // QNames are dealt above, must be axis ident
michael@0 228 if (++mPosition >= end || *mPosition != COLON ||
michael@0 229 prevToken->mType != Token::CNAME) {
michael@0 230 return NS_ERROR_XPATH_BAD_COLON;
michael@0 231 }
michael@0 232 prevToken->mType = Token::AXIS_IDENTIFIER;
michael@0 233 ++mPosition;
michael@0 234 isToken = false;
michael@0 235 break;
michael@0 236 case FORWARD_SLASH :
michael@0 237 if (++mPosition < end && *mPosition == FORWARD_SLASH) {
michael@0 238 ++mPosition;
michael@0 239 newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
michael@0 240 }
michael@0 241 else {
michael@0 242 newToken = new Token(mPosition - 1, Token::PARENT_OP);
michael@0 243 }
michael@0 244 break;
michael@0 245 case BANG : // can only be !=
michael@0 246 if (++mPosition < end && *mPosition == EQUAL) {
michael@0 247 ++mPosition;
michael@0 248 newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
michael@0 249 break;
michael@0 250 }
michael@0 251 // Error ! is not not()
michael@0 252 return NS_ERROR_XPATH_BAD_BANG;
michael@0 253 case EQUAL:
michael@0 254 newToken = new Token(mPosition, Token::EQUAL_OP);
michael@0 255 ++mPosition;
michael@0 256 break;
michael@0 257 case L_ANGLE:
michael@0 258 if (++mPosition == end) {
michael@0 259 return NS_ERROR_XPATH_UNEXPECTED_END;
michael@0 260 }
michael@0 261 if (*mPosition == EQUAL) {
michael@0 262 ++mPosition;
michael@0 263 newToken = new Token(mPosition - 2, mPosition,
michael@0 264 Token::LESS_OR_EQUAL_OP);
michael@0 265 }
michael@0 266 else {
michael@0 267 newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
michael@0 268 }
michael@0 269 break;
michael@0 270 case R_ANGLE:
michael@0 271 if (++mPosition == end) {
michael@0 272 return NS_ERROR_XPATH_UNEXPECTED_END;
michael@0 273 }
michael@0 274 if (*mPosition == EQUAL) {
michael@0 275 ++mPosition;
michael@0 276 newToken = new Token(mPosition - 2, mPosition,
michael@0 277 Token::GREATER_OR_EQUAL_OP);
michael@0 278 }
michael@0 279 else {
michael@0 280 newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
michael@0 281 }
michael@0 282 break;
michael@0 283 case HYPHEN :
michael@0 284 newToken = new Token(mPosition, Token::SUBTRACTION_OP);
michael@0 285 ++mPosition;
michael@0 286 break;
michael@0 287 case ASTERIX:
michael@0 288 if (nextIsOperatorToken(prevToken)) {
michael@0 289 newToken = new Token(mPosition, Token::MULTIPLY_OP);
michael@0 290 }
michael@0 291 else {
michael@0 292 newToken = new Token(mPosition, Token::CNAME);
michael@0 293 }
michael@0 294 ++mPosition;
michael@0 295 break;
michael@0 296 case L_PAREN:
michael@0 297 if (prevToken->mType == Token::CNAME) {
michael@0 298 const nsDependentSubstring& val = prevToken->Value();
michael@0 299 if (val.EqualsLiteral("comment")) {
michael@0 300 prevToken->mType = Token::COMMENT_AND_PAREN;
michael@0 301 }
michael@0 302 else if (val.EqualsLiteral("node")) {
michael@0 303 prevToken->mType = Token::NODE_AND_PAREN;
michael@0 304 }
michael@0 305 else if (val.EqualsLiteral("processing-instruction")) {
michael@0 306 prevToken->mType = Token::PROC_INST_AND_PAREN;
michael@0 307 }
michael@0 308 else if (val.EqualsLiteral("text")) {
michael@0 309 prevToken->mType = Token::TEXT_AND_PAREN;
michael@0 310 }
michael@0 311 else {
michael@0 312 prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
michael@0 313 }
michael@0 314 isToken = false;
michael@0 315 }
michael@0 316 else {
michael@0 317 newToken = new Token(mPosition, Token::L_PAREN);
michael@0 318 }
michael@0 319 ++mPosition;
michael@0 320 break;
michael@0 321 case R_PAREN:
michael@0 322 newToken = new Token(mPosition, Token::R_PAREN);
michael@0 323 ++mPosition;
michael@0 324 break;
michael@0 325 case L_BRACKET:
michael@0 326 newToken = new Token(mPosition, Token::L_BRACKET);
michael@0 327 ++mPosition;
michael@0 328 break;
michael@0 329 case R_BRACKET:
michael@0 330 newToken = new Token(mPosition, Token::R_BRACKET);
michael@0 331 ++mPosition;
michael@0 332 break;
michael@0 333 case COMMA:
michael@0 334 newToken = new Token(mPosition, Token::COMMA);
michael@0 335 ++mPosition;
michael@0 336 break;
michael@0 337 case AT_SIGN :
michael@0 338 newToken = new Token(mPosition, Token::AT_SIGN);
michael@0 339 ++mPosition;
michael@0 340 break;
michael@0 341 case PLUS:
michael@0 342 newToken = new Token(mPosition, Token::ADDITION_OP);
michael@0 343 ++mPosition;
michael@0 344 break;
michael@0 345 case VERT_BAR:
michael@0 346 newToken = new Token(mPosition, Token::UNION_OP);
michael@0 347 ++mPosition;
michael@0 348 break;
michael@0 349 default:
michael@0 350 // Error, don't grok character :-(
michael@0 351 return NS_ERROR_XPATH_ILLEGAL_CHAR;
michael@0 352 }
michael@0 353 }
michael@0 354 if (isToken) {
michael@0 355 NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
michael@0 356 NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
michael@0 357 prevToken = newToken;
michael@0 358 addToken(newToken);
michael@0 359 }
michael@0 360 }
michael@0 361
michael@0 362 // add a endToken to the list
michael@0 363 newToken = new Token(end, end, Token::END);
michael@0 364 if (!newToken) {
michael@0 365 return NS_ERROR_OUT_OF_MEMORY;
michael@0 366 }
michael@0 367 addToken(newToken);
michael@0 368
michael@0 369 return NS_OK;
michael@0 370 }

mercurial