dom/xslt/xpath/txExprLexer.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/dom/xslt/xpath/txExprLexer.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,370 @@
     1.4 +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/**
    1.10 + * Lexical analyzer for XPath expressions
    1.11 + */
    1.12 +
    1.13 +#include "txExprLexer.h"
    1.14 +#include "nsGkAtoms.h"
    1.15 +#include "nsString.h"
    1.16 +#include "nsError.h"
    1.17 +#include "txXMLUtils.h"
    1.18 +
    1.19 +/**
    1.20 + * Creates a new ExprLexer
    1.21 + */
    1.22 +txExprLexer::txExprLexer()
    1.23 +  : mCurrentItem(nullptr),
    1.24 +    mFirstItem(nullptr),
    1.25 +    mLastItem(nullptr),
    1.26 +    mTokenCount(0)
    1.27 +{
    1.28 +}
    1.29 +
    1.30 +/**
    1.31 + * Destroys this instance of an txExprLexer
    1.32 + */
    1.33 +txExprLexer::~txExprLexer()
    1.34 +{
    1.35 +  //-- delete tokens
    1.36 +  Token* tok = mFirstItem;
    1.37 +  while (tok) {
    1.38 +    Token* temp = tok->mNext;
    1.39 +    delete tok;
    1.40 +    tok = temp;
    1.41 +  }
    1.42 +  mCurrentItem = nullptr;
    1.43 +}
    1.44 +
    1.45 +Token*
    1.46 +txExprLexer::nextToken()
    1.47 +{
    1.48 +  if (!mCurrentItem) {
    1.49 +    NS_NOTREACHED("nextToken called on uninitialized lexer");
    1.50 +    return nullptr;
    1.51 +  }
    1.52 +
    1.53 +  if (mCurrentItem->mType == Token::END) {
    1.54 +    // Do not progress beyond the end token
    1.55 +    return mCurrentItem;
    1.56 +  }
    1.57 +
    1.58 +  Token* token = mCurrentItem;
    1.59 +  mCurrentItem = mCurrentItem->mNext;
    1.60 +  return token;
    1.61 +}
    1.62 +
    1.63 +void
    1.64 +txExprLexer::addToken(Token* aToken)
    1.65 +{
    1.66 +  if (mLastItem) {
    1.67 +    mLastItem->mNext = aToken;
    1.68 +  }
    1.69 +  if (!mFirstItem) {
    1.70 +    mFirstItem = aToken;
    1.71 +    mCurrentItem = aToken;
    1.72 +  }
    1.73 +  mLastItem = aToken;
    1.74 +  ++mTokenCount;
    1.75 +}
    1.76 +
    1.77 +/**
    1.78 + * Returns true if the following Token should be an operator.
    1.79 + * This is a helper for the first bullet of [XPath 3.7]
    1.80 + *  Lexical Structure
    1.81 + */
    1.82 +bool
    1.83 +txExprLexer::nextIsOperatorToken(Token* aToken)
    1.84 +{
    1.85 +  if (!aToken || aToken->mType == Token::NULL_TOKEN) {
    1.86 +    return false;
    1.87 +  }
    1.88 +  /* This relies on the tokens having the right order in txExprLexer.h */
    1.89 +  return aToken->mType < Token::COMMA ||
    1.90 +    aToken->mType > Token::UNION_OP;
    1.91 +
    1.92 +}
    1.93 +
    1.94 +/**
    1.95 + * Parses the given string into a sequence of Tokens
    1.96 + */
    1.97 +nsresult
    1.98 +txExprLexer::parse(const nsASingleFragmentString& aPattern)
    1.99 +{
   1.100 +  iterator start, end;
   1.101 +  start = aPattern.BeginReading(mPosition);
   1.102 +  aPattern.EndReading(end);
   1.103 +
   1.104 +  //-- initialize previous token, this will automatically get
   1.105 +  //-- deleted when it goes out of scope
   1.106 +  Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
   1.107 +
   1.108 +  Token::Type defType;
   1.109 +  Token* newToken = nullptr;
   1.110 +  Token* prevToken = &nullToken;
   1.111 +  bool isToken;
   1.112 +
   1.113 +  while (mPosition < end) {
   1.114 +
   1.115 +    defType = Token::CNAME;
   1.116 +    isToken = true;
   1.117 +
   1.118 +    if (*mPosition == DOLLAR_SIGN) {
   1.119 +      if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
   1.120 +        return NS_ERROR_XPATH_INVALID_VAR_NAME;
   1.121 +      }
   1.122 +      defType = Token::VAR_REFERENCE;
   1.123 +    } 
   1.124 +    // just reuse the QName parsing, which will use defType 
   1.125 +    // the token to construct
   1.126 +
   1.127 +    if (XMLUtils::isLetter(*mPosition)) {
   1.128 +      // NCName, can get QName or OperatorName;
   1.129 +      //  FunctionName, NodeName, and AxisSpecifier may want whitespace,
   1.130 +      //  and are dealt with below
   1.131 +      start = mPosition;
   1.132 +      while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
   1.133 +        /* just go */
   1.134 +      }
   1.135 +      if (mPosition < end && *mPosition == COLON) {
   1.136 +        // try QName or wildcard, might need to step back for axis
   1.137 +        if (++mPosition == end) {
   1.138 +          return NS_ERROR_XPATH_UNEXPECTED_END;
   1.139 +        }
   1.140 +        if (XMLUtils::isLetter(*mPosition)) {
   1.141 +          while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
   1.142 +            /* just go */
   1.143 +          }
   1.144 +        }
   1.145 +        else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
   1.146 +          // eat wildcard for NameTest, bail for var ref at COLON
   1.147 +          ++mPosition;
   1.148 +        }
   1.149 +        else {
   1.150 +          --mPosition; // step back
   1.151 +        }
   1.152 +      }
   1.153 +      if (nextIsOperatorToken(prevToken)) {
   1.154 +        nsDependentSubstring op(Substring(start, mPosition));
   1.155 +        if (nsGkAtoms::_and->Equals(op)) {
   1.156 +          defType = Token::AND_OP;
   1.157 +        }
   1.158 +        else if (nsGkAtoms::_or->Equals(op)) {
   1.159 +          defType = Token::OR_OP;
   1.160 +        }
   1.161 +        else if (nsGkAtoms::mod->Equals(op)) {
   1.162 +          defType = Token::MODULUS_OP;
   1.163 +        }
   1.164 +        else if (nsGkAtoms::div->Equals(op)) {
   1.165 +          defType = Token::DIVIDE_OP;
   1.166 +        }
   1.167 +        else {
   1.168 +          // XXX QUESTION: spec is not too precise
   1.169 +          // badops is sure an error, but is bad:ops, too? We say yes!
   1.170 +          return NS_ERROR_XPATH_OPERATOR_EXPECTED;
   1.171 +        }
   1.172 +      }
   1.173 +      newToken = new Token(start, mPosition, defType);
   1.174 +    }
   1.175 +    else if (isXPathDigit(*mPosition)) {
   1.176 +      start = mPosition;
   1.177 +      while (++mPosition < end && isXPathDigit(*mPosition)) {
   1.178 +        /* just go */
   1.179 +      }
   1.180 +      if (mPosition < end && *mPosition == '.') {
   1.181 +        while (++mPosition < end && isXPathDigit(*mPosition)) {
   1.182 +          /* just go */
   1.183 +        }
   1.184 +      }
   1.185 +      newToken = new Token(start, mPosition, Token::NUMBER);
   1.186 +    }
   1.187 +    else {
   1.188 +      switch (*mPosition) {
   1.189 +        //-- ignore whitespace
   1.190 +      case SPACE:
   1.191 +      case TX_TAB:
   1.192 +      case TX_CR:
   1.193 +      case TX_LF:
   1.194 +        ++mPosition;
   1.195 +        isToken = false;
   1.196 +        break;
   1.197 +      case S_QUOTE :
   1.198 +      case D_QUOTE :
   1.199 +        start = mPosition;
   1.200 +        while (++mPosition < end && *mPosition != *start) {
   1.201 +          // eat literal
   1.202 +        }
   1.203 +        if (mPosition == end) {
   1.204 +          mPosition = start;
   1.205 +          return NS_ERROR_XPATH_UNCLOSED_LITERAL;
   1.206 +        }
   1.207 +        newToken = new Token(start + 1, mPosition, Token::LITERAL);
   1.208 +        ++mPosition;
   1.209 +        break;
   1.210 +      case PERIOD:
   1.211 +        // period can be .., .(DIGITS)+ or ., check next
   1.212 +        if (++mPosition == end) {
   1.213 +          newToken = new Token(mPosition - 1, Token::SELF_NODE);
   1.214 +        }
   1.215 +        else if (isXPathDigit(*mPosition)) {
   1.216 +          start = mPosition - 1;
   1.217 +          while (++mPosition < end && isXPathDigit(*mPosition)) {
   1.218 +            /* just go */
   1.219 +          }
   1.220 +          newToken = new Token(start, mPosition, Token::NUMBER);
   1.221 +        }
   1.222 +        else if (*mPosition == PERIOD) {
   1.223 +          ++mPosition;
   1.224 +          newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
   1.225 +        }
   1.226 +        else {
   1.227 +          newToken = new Token(mPosition - 1, Token::SELF_NODE);
   1.228 +        }
   1.229 +        break;
   1.230 +      case COLON: // QNames are dealt above, must be axis ident
   1.231 +        if (++mPosition >= end || *mPosition != COLON ||
   1.232 +            prevToken->mType != Token::CNAME) {
   1.233 +          return NS_ERROR_XPATH_BAD_COLON;
   1.234 +        }
   1.235 +        prevToken->mType = Token::AXIS_IDENTIFIER;
   1.236 +        ++mPosition;
   1.237 +        isToken = false;
   1.238 +        break;
   1.239 +      case FORWARD_SLASH :
   1.240 +        if (++mPosition < end && *mPosition == FORWARD_SLASH) {
   1.241 +          ++mPosition;
   1.242 +          newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
   1.243 +        }
   1.244 +        else {
   1.245 +          newToken = new Token(mPosition - 1, Token::PARENT_OP);
   1.246 +        }
   1.247 +        break;
   1.248 +      case BANG : // can only be !=
   1.249 +        if (++mPosition < end && *mPosition == EQUAL) {
   1.250 +          ++mPosition;
   1.251 +          newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
   1.252 +          break;
   1.253 +        }
   1.254 +        // Error ! is not not()
   1.255 +        return NS_ERROR_XPATH_BAD_BANG;
   1.256 +      case EQUAL:
   1.257 +        newToken = new Token(mPosition, Token::EQUAL_OP);
   1.258 +        ++mPosition;
   1.259 +        break;
   1.260 +      case L_ANGLE:
   1.261 +        if (++mPosition == end) {
   1.262 +          return NS_ERROR_XPATH_UNEXPECTED_END;
   1.263 +        }
   1.264 +        if (*mPosition == EQUAL) {
   1.265 +          ++mPosition;
   1.266 +          newToken = new Token(mPosition - 2, mPosition,
   1.267 +                               Token::LESS_OR_EQUAL_OP);
   1.268 +        }
   1.269 +        else {
   1.270 +          newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
   1.271 +        }
   1.272 +        break;
   1.273 +      case R_ANGLE:
   1.274 +        if (++mPosition == end) {
   1.275 +          return NS_ERROR_XPATH_UNEXPECTED_END;
   1.276 +        }
   1.277 +        if (*mPosition == EQUAL) {
   1.278 +          ++mPosition;
   1.279 +          newToken = new Token(mPosition - 2, mPosition,
   1.280 +                               Token::GREATER_OR_EQUAL_OP);
   1.281 +        }
   1.282 +        else {
   1.283 +          newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
   1.284 +        }
   1.285 +        break;
   1.286 +      case HYPHEN :
   1.287 +        newToken = new Token(mPosition, Token::SUBTRACTION_OP);
   1.288 +        ++mPosition;
   1.289 +        break;
   1.290 +      case ASTERIX:
   1.291 +        if (nextIsOperatorToken(prevToken)) {
   1.292 +          newToken = new Token(mPosition, Token::MULTIPLY_OP);
   1.293 +        }
   1.294 +        else {
   1.295 +          newToken = new Token(mPosition, Token::CNAME);
   1.296 +        }
   1.297 +        ++mPosition;
   1.298 +        break;
   1.299 +      case L_PAREN:
   1.300 +        if (prevToken->mType == Token::CNAME) {
   1.301 +          const nsDependentSubstring& val = prevToken->Value();
   1.302 +          if (val.EqualsLiteral("comment")) {
   1.303 +            prevToken->mType = Token::COMMENT_AND_PAREN;
   1.304 +          }
   1.305 +          else if (val.EqualsLiteral("node")) {
   1.306 +            prevToken->mType = Token::NODE_AND_PAREN;
   1.307 +          }
   1.308 +          else if (val.EqualsLiteral("processing-instruction")) {
   1.309 +            prevToken->mType = Token::PROC_INST_AND_PAREN;
   1.310 +          }
   1.311 +          else if (val.EqualsLiteral("text")) {
   1.312 +            prevToken->mType = Token::TEXT_AND_PAREN;
   1.313 +          }
   1.314 +          else {
   1.315 +            prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
   1.316 +          }
   1.317 +          isToken = false;
   1.318 +        }
   1.319 +        else {
   1.320 +          newToken = new Token(mPosition, Token::L_PAREN);
   1.321 +        }
   1.322 +        ++mPosition;
   1.323 +        break;
   1.324 +      case R_PAREN:
   1.325 +        newToken = new Token(mPosition, Token::R_PAREN);
   1.326 +        ++mPosition;
   1.327 +        break;
   1.328 +      case L_BRACKET:
   1.329 +        newToken = new Token(mPosition, Token::L_BRACKET);
   1.330 +        ++mPosition;
   1.331 +        break;
   1.332 +      case R_BRACKET:
   1.333 +        newToken = new Token(mPosition, Token::R_BRACKET);
   1.334 +        ++mPosition;
   1.335 +        break;
   1.336 +      case COMMA:
   1.337 +        newToken = new Token(mPosition, Token::COMMA);
   1.338 +        ++mPosition;
   1.339 +        break;
   1.340 +      case AT_SIGN :
   1.341 +        newToken = new Token(mPosition, Token::AT_SIGN);
   1.342 +        ++mPosition;
   1.343 +        break;
   1.344 +      case PLUS:
   1.345 +        newToken = new Token(mPosition, Token::ADDITION_OP);
   1.346 +        ++mPosition;
   1.347 +        break;
   1.348 +      case VERT_BAR:
   1.349 +        newToken = new Token(mPosition, Token::UNION_OP);
   1.350 +        ++mPosition;
   1.351 +        break;
   1.352 +      default:
   1.353 +        // Error, don't grok character :-(
   1.354 +        return NS_ERROR_XPATH_ILLEGAL_CHAR;
   1.355 +      }
   1.356 +    }
   1.357 +    if (isToken) {
   1.358 +      NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
   1.359 +      NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
   1.360 +      prevToken = newToken;
   1.361 +      addToken(newToken);
   1.362 +    }
   1.363 +  }
   1.364 +
   1.365 +  // add a endToken to the list
   1.366 +  newToken = new Token(end, end, Token::END);
   1.367 +  if (!newToken) {
   1.368 +    return NS_ERROR_OUT_OF_MEMORY;
   1.369 +  }
   1.370 +  addToken(newToken);
   1.371 +
   1.372 +  return NS_OK;
   1.373 +}

mercurial