michael@0: // -*- mode: c++ -*- michael@0: michael@0: // Copyright (c) 2010 Google Inc. michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // postfix_evaluator-inl.h: Postfix (reverse Polish) notation expression michael@0: // evaluator. michael@0: // michael@0: // Documentation in postfix_evaluator.h. michael@0: // michael@0: // Author: Mark Mentovai michael@0: michael@0: #ifndef PROCESSOR_POSTFIX_EVALUATOR_INL_H__ michael@0: #define PROCESSOR_POSTFIX_EVALUATOR_INL_H__ michael@0: michael@0: #include "processor/postfix_evaluator.h" michael@0: michael@0: #include michael@0: michael@0: #include michael@0: michael@0: #include "google_breakpad/processor/memory_region.h" michael@0: #include "common/logging.h" michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: using std::istringstream; michael@0: using std::ostringstream; michael@0: michael@0: michael@0: // A small class used in Evaluate to make sure to clean up the stack michael@0: // before returning failure. michael@0: template michael@0: class AutoStackClearer { michael@0: public: michael@0: explicit AutoStackClearer(vector > *stack) michael@0: : stack_(stack) {} michael@0: ~AutoStackClearer() { stack_->clear(); } michael@0: michael@0: private: michael@0: vector > *stack_; michael@0: }; michael@0: michael@0: michael@0: template michael@0: bool PostfixEvaluator::EvaluateToken( michael@0: const string &token, michael@0: const string &expression, michael@0: DictionaryValidityType *assigned) { michael@0: // There are enough binary operations that do exactly the same thing michael@0: // (other than the specific operation, of course) that it makes sense michael@0: // to share as much code as possible. michael@0: enum BinaryOperation { michael@0: BINARY_OP_NONE = 0, michael@0: BINARY_OP_ADD, michael@0: BINARY_OP_SUBTRACT, michael@0: BINARY_OP_MULTIPLY, michael@0: BINARY_OP_DIVIDE_QUOTIENT, michael@0: BINARY_OP_DIVIDE_MODULUS, michael@0: BINARY_OP_ALIGN michael@0: }; michael@0: michael@0: BinaryOperation operation = BINARY_OP_NONE; michael@0: if (token == "+") michael@0: operation = BINARY_OP_ADD; michael@0: else if (token == "-") michael@0: operation = BINARY_OP_SUBTRACT; michael@0: else if (token == "*") michael@0: operation = BINARY_OP_MULTIPLY; michael@0: else if (token == "/") michael@0: operation = BINARY_OP_DIVIDE_QUOTIENT; michael@0: else if (token == "%") michael@0: operation = BINARY_OP_DIVIDE_MODULUS; michael@0: else if (token == "@") michael@0: operation = BINARY_OP_ALIGN; michael@0: michael@0: if (operation != BINARY_OP_NONE) { michael@0: // Get the operands. michael@0: ValueType operand1 = ValueType(); michael@0: ValueType operand2 = ValueType(); michael@0: if (!PopValues(&operand1, &operand2)) { michael@0: BPLOG(ERROR) << "Could not PopValues to get two values for binary " michael@0: "operation " << token << ": " << expression; michael@0: return false; michael@0: } michael@0: michael@0: // Perform the operation. michael@0: ValueType result; michael@0: switch (operation) { michael@0: case BINARY_OP_ADD: michael@0: result = operand1 + operand2; michael@0: break; michael@0: case BINARY_OP_SUBTRACT: michael@0: result = operand1 - operand2; michael@0: break; michael@0: case BINARY_OP_MULTIPLY: michael@0: result = operand1 * operand2; michael@0: break; michael@0: case BINARY_OP_DIVIDE_QUOTIENT: michael@0: result = operand1 / operand2; michael@0: break; michael@0: case BINARY_OP_DIVIDE_MODULUS: michael@0: result = operand1 % operand2; michael@0: break; michael@0: case BINARY_OP_ALIGN: michael@0: result = michael@0: operand1 & (static_cast(-1) ^ (operand2 - 1)); michael@0: break; michael@0: case BINARY_OP_NONE: michael@0: // This will not happen, but compilers will want a default or michael@0: // BINARY_OP_NONE case. michael@0: BPLOG(ERROR) << "Not reached!"; michael@0: return false; michael@0: break; michael@0: } michael@0: michael@0: // Save the result. michael@0: PushValue(result); michael@0: } else if (token == "^") { michael@0: // ^ for unary dereference. Can't dereference without memory. michael@0: if (!memory_) { michael@0: BPLOG(ERROR) << "Attempt to dereference without memory: " << michael@0: expression; michael@0: return false; michael@0: } michael@0: michael@0: ValueType address; michael@0: if (!PopValue(&address)) { michael@0: BPLOG(ERROR) << "Could not PopValue to get value to dereference: " << michael@0: expression; michael@0: return false; michael@0: } michael@0: michael@0: ValueType value; michael@0: if (!memory_->GetMemoryAtAddress(address, &value)) { michael@0: BPLOG(ERROR) << "Could not dereference memory at address " << michael@0: HexString(address) << ": " << expression; michael@0: return false; michael@0: } michael@0: michael@0: PushValue(value); michael@0: } else if (token == "=") { michael@0: // = for assignment. michael@0: ValueType value; michael@0: if (!PopValue(&value)) { michael@0: BPLOG(INFO) << "Could not PopValue to get value to assign: " << michael@0: expression; michael@0: return false; michael@0: } michael@0: michael@0: // Assignment is only meaningful when assigning into an identifier. michael@0: // The identifier must name a variable, not a constant. Variables michael@0: // begin with '$'. michael@0: const UniqueString* identifier; michael@0: if (PopValueOrIdentifier(NULL, &identifier) != POP_RESULT_IDENTIFIER) { michael@0: BPLOG(ERROR) << "PopValueOrIdentifier returned a value, but an " michael@0: "identifier is needed to assign " << michael@0: HexString(value) << ": " << expression; michael@0: return false; michael@0: } michael@0: if (identifier == ustr__empty() || Index(identifier,0) != '$') { michael@0: BPLOG(ERROR) << "Can't assign " << HexString(value) << " to " << michael@0: identifier << ": " << expression; michael@0: return false; michael@0: } michael@0: michael@0: dictionary_->set(identifier, value); michael@0: if (assigned) michael@0: assigned->set(identifier, true); michael@0: } else { michael@0: // Push it onto the stack as-is, but first convert it either to a michael@0: // ValueType (if a literal) or to a UniqueString* (if an identifier). michael@0: // michael@0: // First, try to treat the value as a literal. Literals may have leading michael@0: // '-' sign, and the entire remaining string must be parseable as michael@0: // ValueType. If this isn't possible, it can't be a literal, so treat it michael@0: // as an identifier instead. michael@0: // michael@0: // Some versions of the libstdc++, the GNU standard C++ library, have michael@0: // stream extractors for unsigned integer values that permit a leading michael@0: // '-' sign (6.0.13); others do not (6.0.9). Since we require it, we michael@0: // handle it explicitly here. michael@0: istringstream token_stream(token); michael@0: ValueType literal = ValueType(); michael@0: bool negative = false; michael@0: if (token_stream.peek() == '-') { michael@0: negative = true; michael@0: token_stream.get(); michael@0: } michael@0: if (token_stream >> literal && token_stream.peek() == EOF) { michael@0: PushValue(negative ? (-literal) : literal); michael@0: } else { michael@0: PushIdentifier(ToUniqueString(token)); michael@0: } michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: template michael@0: bool PostfixEvaluator::EvaluateInternal( michael@0: const string &expression, michael@0: DictionaryValidityType *assigned) { michael@0: // Tokenize, splitting on whitespace. michael@0: istringstream stream(expression); michael@0: string token; michael@0: while (stream >> token) { michael@0: // Normally, tokens are whitespace-separated, but occasionally, the michael@0: // assignment operator is smashed up against the next token, i.e. michael@0: // $T0 $ebp 128 + =$eip $T0 4 + ^ =$ebp $T0 ^ = michael@0: // This has been observed in program strings produced by MSVS 2010 in LTO michael@0: // mode. michael@0: if (token.size() > 1 && token[0] == '=') { michael@0: if (!EvaluateToken("=", expression, assigned)) { michael@0: return false; michael@0: } michael@0: michael@0: if (!EvaluateToken(token.substr(1), expression, assigned)) { michael@0: return false; michael@0: } michael@0: } else if (!EvaluateToken(token, expression, assigned)) { michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: template michael@0: bool PostfixEvaluator::Evaluate(const Module::Expr& expr, michael@0: DictionaryValidityType* assigned) { michael@0: // The expression is being exevaluated only for its side effects. Skip michael@0: // expressions that denote values only. michael@0: if (expr.how_ != Module::kExprPostfix) { michael@0: BPLOG(ERROR) << "Can't evaluate for side-effects: " << expr; michael@0: return false; michael@0: } michael@0: michael@0: // Ensure that the stack is cleared before returning. michael@0: AutoStackClearer clearer(&stack_); michael@0: michael@0: if (!EvaluateInternal(expr.postfix_, assigned)) michael@0: return false; michael@0: michael@0: // If there's anything left on the stack, it indicates incomplete execution. michael@0: // This is a failure case. If the stack is empty, evalution was complete michael@0: // and successful. michael@0: if (stack_.empty()) michael@0: return true; michael@0: michael@0: BPLOG(ERROR) << "Incomplete execution: " << expr; michael@0: return false; michael@0: } michael@0: michael@0: template michael@0: bool PostfixEvaluator::EvaluateForValue(const Module::Expr& expr, michael@0: ValueType* result) { michael@0: switch (expr.how_) { michael@0: michael@0: // Postfix expression. Give to the evaluator and return the michael@0: // one-and-only stack element that should be left over. michael@0: case Module::kExprPostfix: { michael@0: // Ensure that the stack is cleared before returning. michael@0: AutoStackClearer clearer(&stack_); michael@0: michael@0: if (!EvaluateInternal(expr.postfix_, NULL)) michael@0: return false; michael@0: michael@0: // A successful execution should leave exactly one value on the stack. michael@0: if (stack_.size() != 1) { michael@0: BPLOG(ERROR) << "Expression yielded bad number of results: " michael@0: << "'" << expr << "'"; michael@0: return false; michael@0: } michael@0: michael@0: return PopValue(result); michael@0: } michael@0: michael@0: // Simple-form expressions michael@0: case Module::kExprSimple: michael@0: case Module::kExprSimpleMem: { michael@0: // Look up the base value michael@0: bool found = false; michael@0: ValueType v = dictionary_->get(&found, expr.ident_); michael@0: if (!found) { michael@0: // The identifier wasn't found in the dictionary. Don't imply any michael@0: // default value, just fail. michael@0: static uint64_t n_complaints = 0; // This isn't threadsafe. michael@0: n_complaints++; michael@0: if (is_power_of_2(n_complaints)) { michael@0: BPLOG(INFO) << "Identifier " << FromUniqueString(expr.ident_) michael@0: << " not in dictionary (kExprSimple{Mem})" michael@0: << " (shown " << n_complaints << " times)"; michael@0: } michael@0: return false; michael@0: } michael@0: michael@0: // Form the sum michael@0: ValueType sum = v + (int64_t)expr.offset_; michael@0: michael@0: // and dereference if necessary michael@0: if (expr.how_ == Module::kExprSimpleMem) { michael@0: ValueType derefd; michael@0: if (!memory_ || !memory_->GetMemoryAtAddress(sum, &derefd)) { michael@0: return false; michael@0: } michael@0: *result = derefd; michael@0: } else { michael@0: *result = sum; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: default: michael@0: return false; michael@0: } michael@0: } michael@0: michael@0: michael@0: template michael@0: typename PostfixEvaluator::PopResult michael@0: PostfixEvaluator::PopValueOrIdentifier( michael@0: ValueType *value, const UniqueString** identifier) { michael@0: // There needs to be at least one element on the stack to pop. michael@0: if (!stack_.size()) michael@0: return POP_RESULT_FAIL; michael@0: michael@0: StackElem el = stack_.back(); michael@0: stack_.pop_back(); michael@0: michael@0: if (el.isValue) { michael@0: if (value) michael@0: *value = el.u.val; michael@0: return POP_RESULT_VALUE; michael@0: } else { michael@0: if (identifier) michael@0: *identifier = el.u.ustr; michael@0: return POP_RESULT_IDENTIFIER; michael@0: } michael@0: } michael@0: michael@0: michael@0: template michael@0: bool PostfixEvaluator::PopValue(ValueType *value) { michael@0: ValueType literal = ValueType(); michael@0: const UniqueString* token; michael@0: PopResult result; michael@0: if ((result = PopValueOrIdentifier(&literal, &token)) == POP_RESULT_FAIL) { michael@0: return false; michael@0: } else if (result == POP_RESULT_VALUE) { michael@0: // This is the easy case. michael@0: *value = literal; michael@0: } else { // result == POP_RESULT_IDENTIFIER michael@0: // There was an identifier at the top of the stack. Resolve it to a michael@0: // value by looking it up in the dictionary. michael@0: bool found = false; michael@0: ValueType v = dictionary_->get(&found, token); michael@0: if (!found) { michael@0: // The identifier wasn't found in the dictionary. Don't imply any michael@0: // default value, just fail. michael@0: BPLOG(INFO) << "Identifier " << FromUniqueString(token) michael@0: << " not in dictionary"; michael@0: return false; michael@0: } michael@0: michael@0: *value = v; michael@0: } michael@0: michael@0: return true; michael@0: } michael@0: michael@0: michael@0: template michael@0: bool PostfixEvaluator::PopValues(ValueType *value1, michael@0: ValueType *value2) { michael@0: return PopValue(value2) && PopValue(value1); michael@0: } michael@0: michael@0: michael@0: template michael@0: void PostfixEvaluator::PushValue(const ValueType &value) { michael@0: StackElem el(value); michael@0: stack_.push_back(el); michael@0: } michael@0: michael@0: template michael@0: void PostfixEvaluator::PushIdentifier(const UniqueString* str) { michael@0: StackElem el(str); michael@0: stack_.push_back(el); michael@0: } michael@0: michael@0: michael@0: } // namespace google_breakpad michael@0: michael@0: michael@0: #endif // PROCESSOR_POSTFIX_EVALUATOR_INL_H__