dom/xslt/xpath/txExprLexer.cpp

Tue, 06 Jan 2015 21:39:09 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Tue, 06 Jan 2015 21:39:09 +0100
branch
TOR_BUG_9701
changeset 8
97036ab72558
permissions
-rw-r--r--

Conditionally force memory storage according to privacy.thirdparty.isolate;
This solves Tor bug #9701, complying with disk avoidance documented in
https://www.torproject.org/projects/torbrowser/design/#disk-avoidance.

     1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
     2 /* This Source Code Form is subject to the terms of the Mozilla Public
     3  * License, v. 2.0. If a copy of the MPL was not distributed with this
     4  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     6 /**
     7  * Lexical analyzer for XPath expressions
     8  */
    10 #include "txExprLexer.h"
    11 #include "nsGkAtoms.h"
    12 #include "nsString.h"
    13 #include "nsError.h"
    14 #include "txXMLUtils.h"
    16 /**
    17  * Creates a new ExprLexer
    18  */
    19 txExprLexer::txExprLexer()
    20   : mCurrentItem(nullptr),
    21     mFirstItem(nullptr),
    22     mLastItem(nullptr),
    23     mTokenCount(0)
    24 {
    25 }
    27 /**
    28  * Destroys this instance of an txExprLexer
    29  */
    30 txExprLexer::~txExprLexer()
    31 {
    32   //-- delete tokens
    33   Token* tok = mFirstItem;
    34   while (tok) {
    35     Token* temp = tok->mNext;
    36     delete tok;
    37     tok = temp;
    38   }
    39   mCurrentItem = nullptr;
    40 }
    42 Token*
    43 txExprLexer::nextToken()
    44 {
    45   if (!mCurrentItem) {
    46     NS_NOTREACHED("nextToken called on uninitialized lexer");
    47     return nullptr;
    48   }
    50   if (mCurrentItem->mType == Token::END) {
    51     // Do not progress beyond the end token
    52     return mCurrentItem;
    53   }
    55   Token* token = mCurrentItem;
    56   mCurrentItem = mCurrentItem->mNext;
    57   return token;
    58 }
    60 void
    61 txExprLexer::addToken(Token* aToken)
    62 {
    63   if (mLastItem) {
    64     mLastItem->mNext = aToken;
    65   }
    66   if (!mFirstItem) {
    67     mFirstItem = aToken;
    68     mCurrentItem = aToken;
    69   }
    70   mLastItem = aToken;
    71   ++mTokenCount;
    72 }
    74 /**
    75  * Returns true if the following Token should be an operator.
    76  * This is a helper for the first bullet of [XPath 3.7]
    77  *  Lexical Structure
    78  */
    79 bool
    80 txExprLexer::nextIsOperatorToken(Token* aToken)
    81 {
    82   if (!aToken || aToken->mType == Token::NULL_TOKEN) {
    83     return false;
    84   }
    85   /* This relies on the tokens having the right order in txExprLexer.h */
    86   return aToken->mType < Token::COMMA ||
    87     aToken->mType > Token::UNION_OP;
    89 }
    91 /**
    92  * Parses the given string into a sequence of Tokens
    93  */
    94 nsresult
    95 txExprLexer::parse(const nsASingleFragmentString& aPattern)
    96 {
    97   iterator start, end;
    98   start = aPattern.BeginReading(mPosition);
    99   aPattern.EndReading(end);
   101   //-- initialize previous token, this will automatically get
   102   //-- deleted when it goes out of scope
   103   Token nullToken(nullptr, nullptr, Token::NULL_TOKEN);
   105   Token::Type defType;
   106   Token* newToken = nullptr;
   107   Token* prevToken = &nullToken;
   108   bool isToken;
   110   while (mPosition < end) {
   112     defType = Token::CNAME;
   113     isToken = true;
   115     if (*mPosition == DOLLAR_SIGN) {
   116       if (++mPosition == end || !XMLUtils::isLetter(*mPosition)) {
   117         return NS_ERROR_XPATH_INVALID_VAR_NAME;
   118       }
   119       defType = Token::VAR_REFERENCE;
   120     } 
   121     // just reuse the QName parsing, which will use defType 
   122     // the token to construct
   124     if (XMLUtils::isLetter(*mPosition)) {
   125       // NCName, can get QName or OperatorName;
   126       //  FunctionName, NodeName, and AxisSpecifier may want whitespace,
   127       //  and are dealt with below
   128       start = mPosition;
   129       while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
   130         /* just go */
   131       }
   132       if (mPosition < end && *mPosition == COLON) {
   133         // try QName or wildcard, might need to step back for axis
   134         if (++mPosition == end) {
   135           return NS_ERROR_XPATH_UNEXPECTED_END;
   136         }
   137         if (XMLUtils::isLetter(*mPosition)) {
   138           while (++mPosition < end && XMLUtils::isNCNameChar(*mPosition)) {
   139             /* just go */
   140           }
   141         }
   142         else if (*mPosition == '*' && defType != Token::VAR_REFERENCE) {
   143           // eat wildcard for NameTest, bail for var ref at COLON
   144           ++mPosition;
   145         }
   146         else {
   147           --mPosition; // step back
   148         }
   149       }
   150       if (nextIsOperatorToken(prevToken)) {
   151         nsDependentSubstring op(Substring(start, mPosition));
   152         if (nsGkAtoms::_and->Equals(op)) {
   153           defType = Token::AND_OP;
   154         }
   155         else if (nsGkAtoms::_or->Equals(op)) {
   156           defType = Token::OR_OP;
   157         }
   158         else if (nsGkAtoms::mod->Equals(op)) {
   159           defType = Token::MODULUS_OP;
   160         }
   161         else if (nsGkAtoms::div->Equals(op)) {
   162           defType = Token::DIVIDE_OP;
   163         }
   164         else {
   165           // XXX QUESTION: spec is not too precise
   166           // badops is sure an error, but is bad:ops, too? We say yes!
   167           return NS_ERROR_XPATH_OPERATOR_EXPECTED;
   168         }
   169       }
   170       newToken = new Token(start, mPosition, defType);
   171     }
   172     else if (isXPathDigit(*mPosition)) {
   173       start = mPosition;
   174       while (++mPosition < end && isXPathDigit(*mPosition)) {
   175         /* just go */
   176       }
   177       if (mPosition < end && *mPosition == '.') {
   178         while (++mPosition < end && isXPathDigit(*mPosition)) {
   179           /* just go */
   180         }
   181       }
   182       newToken = new Token(start, mPosition, Token::NUMBER);
   183     }
   184     else {
   185       switch (*mPosition) {
   186         //-- ignore whitespace
   187       case SPACE:
   188       case TX_TAB:
   189       case TX_CR:
   190       case TX_LF:
   191         ++mPosition;
   192         isToken = false;
   193         break;
   194       case S_QUOTE :
   195       case D_QUOTE :
   196         start = mPosition;
   197         while (++mPosition < end && *mPosition != *start) {
   198           // eat literal
   199         }
   200         if (mPosition == end) {
   201           mPosition = start;
   202           return NS_ERROR_XPATH_UNCLOSED_LITERAL;
   203         }
   204         newToken = new Token(start + 1, mPosition, Token::LITERAL);
   205         ++mPosition;
   206         break;
   207       case PERIOD:
   208         // period can be .., .(DIGITS)+ or ., check next
   209         if (++mPosition == end) {
   210           newToken = new Token(mPosition - 1, Token::SELF_NODE);
   211         }
   212         else if (isXPathDigit(*mPosition)) {
   213           start = mPosition - 1;
   214           while (++mPosition < end && isXPathDigit(*mPosition)) {
   215             /* just go */
   216           }
   217           newToken = new Token(start, mPosition, Token::NUMBER);
   218         }
   219         else if (*mPosition == PERIOD) {
   220           ++mPosition;
   221           newToken = new Token(mPosition - 2, mPosition, Token::PARENT_NODE);
   222         }
   223         else {
   224           newToken = new Token(mPosition - 1, Token::SELF_NODE);
   225         }
   226         break;
   227       case COLON: // QNames are dealt above, must be axis ident
   228         if (++mPosition >= end || *mPosition != COLON ||
   229             prevToken->mType != Token::CNAME) {
   230           return NS_ERROR_XPATH_BAD_COLON;
   231         }
   232         prevToken->mType = Token::AXIS_IDENTIFIER;
   233         ++mPosition;
   234         isToken = false;
   235         break;
   236       case FORWARD_SLASH :
   237         if (++mPosition < end && *mPosition == FORWARD_SLASH) {
   238           ++mPosition;
   239           newToken = new Token(mPosition - 2, mPosition, Token::ANCESTOR_OP);
   240         }
   241         else {
   242           newToken = new Token(mPosition - 1, Token::PARENT_OP);
   243         }
   244         break;
   245       case BANG : // can only be !=
   246         if (++mPosition < end && *mPosition == EQUAL) {
   247           ++mPosition;
   248           newToken = new Token(mPosition - 2, mPosition, Token::NOT_EQUAL_OP);
   249           break;
   250         }
   251         // Error ! is not not()
   252         return NS_ERROR_XPATH_BAD_BANG;
   253       case EQUAL:
   254         newToken = new Token(mPosition, Token::EQUAL_OP);
   255         ++mPosition;
   256         break;
   257       case L_ANGLE:
   258         if (++mPosition == end) {
   259           return NS_ERROR_XPATH_UNEXPECTED_END;
   260         }
   261         if (*mPosition == EQUAL) {
   262           ++mPosition;
   263           newToken = new Token(mPosition - 2, mPosition,
   264                                Token::LESS_OR_EQUAL_OP);
   265         }
   266         else {
   267           newToken = new Token(mPosition - 1, Token::LESS_THAN_OP);
   268         }
   269         break;
   270       case R_ANGLE:
   271         if (++mPosition == end) {
   272           return NS_ERROR_XPATH_UNEXPECTED_END;
   273         }
   274         if (*mPosition == EQUAL) {
   275           ++mPosition;
   276           newToken = new Token(mPosition - 2, mPosition,
   277                                Token::GREATER_OR_EQUAL_OP);
   278         }
   279         else {
   280           newToken = new Token(mPosition - 1, Token::GREATER_THAN_OP);
   281         }
   282         break;
   283       case HYPHEN :
   284         newToken = new Token(mPosition, Token::SUBTRACTION_OP);
   285         ++mPosition;
   286         break;
   287       case ASTERIX:
   288         if (nextIsOperatorToken(prevToken)) {
   289           newToken = new Token(mPosition, Token::MULTIPLY_OP);
   290         }
   291         else {
   292           newToken = new Token(mPosition, Token::CNAME);
   293         }
   294         ++mPosition;
   295         break;
   296       case L_PAREN:
   297         if (prevToken->mType == Token::CNAME) {
   298           const nsDependentSubstring& val = prevToken->Value();
   299           if (val.EqualsLiteral("comment")) {
   300             prevToken->mType = Token::COMMENT_AND_PAREN;
   301           }
   302           else if (val.EqualsLiteral("node")) {
   303             prevToken->mType = Token::NODE_AND_PAREN;
   304           }
   305           else if (val.EqualsLiteral("processing-instruction")) {
   306             prevToken->mType = Token::PROC_INST_AND_PAREN;
   307           }
   308           else if (val.EqualsLiteral("text")) {
   309             prevToken->mType = Token::TEXT_AND_PAREN;
   310           }
   311           else {
   312             prevToken->mType = Token::FUNCTION_NAME_AND_PAREN;
   313           }
   314           isToken = false;
   315         }
   316         else {
   317           newToken = new Token(mPosition, Token::L_PAREN);
   318         }
   319         ++mPosition;
   320         break;
   321       case R_PAREN:
   322         newToken = new Token(mPosition, Token::R_PAREN);
   323         ++mPosition;
   324         break;
   325       case L_BRACKET:
   326         newToken = new Token(mPosition, Token::L_BRACKET);
   327         ++mPosition;
   328         break;
   329       case R_BRACKET:
   330         newToken = new Token(mPosition, Token::R_BRACKET);
   331         ++mPosition;
   332         break;
   333       case COMMA:
   334         newToken = new Token(mPosition, Token::COMMA);
   335         ++mPosition;
   336         break;
   337       case AT_SIGN :
   338         newToken = new Token(mPosition, Token::AT_SIGN);
   339         ++mPosition;
   340         break;
   341       case PLUS:
   342         newToken = new Token(mPosition, Token::ADDITION_OP);
   343         ++mPosition;
   344         break;
   345       case VERT_BAR:
   346         newToken = new Token(mPosition, Token::UNION_OP);
   347         ++mPosition;
   348         break;
   349       default:
   350         // Error, don't grok character :-(
   351         return NS_ERROR_XPATH_ILLEGAL_CHAR;
   352       }
   353     }
   354     if (isToken) {
   355       NS_ENSURE_TRUE(newToken, NS_ERROR_OUT_OF_MEMORY);
   356       NS_ENSURE_TRUE(newToken != mLastItem, NS_ERROR_FAILURE);
   357       prevToken = newToken;
   358       addToken(newToken);
   359     }
   360   }
   362   // add a endToken to the list
   363   newToken = new Token(end, end, Token::END);
   364   if (!newToken) {
   365     return NS_ERROR_OUT_OF_MEMORY;
   366   }
   367   addToken(newToken);
   369   return NS_OK;
   370 }

mercurial