michael@0: // All rights reserved. michael@0: // michael@0: // Redistribution and use in source and binary forms, with or without michael@0: // modification, are permitted provided that the following conditions are michael@0: // met: michael@0: // michael@0: // * Redistributions of source code must retain the above copyright michael@0: // notice, this list of conditions and the following disclaimer. michael@0: // * Redistributions in binary form must reproduce the above michael@0: // copyright notice, this list of conditions and the following disclaimer michael@0: // in the documentation and/or other materials provided with the michael@0: // distribution. michael@0: // * Neither the name of Google Inc. nor the names of its michael@0: // contributors may be used to endorse or promote products derived from michael@0: // this software without specific prior written permission. michael@0: // michael@0: // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS michael@0: // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT michael@0: // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR michael@0: // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT michael@0: // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, michael@0: // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT michael@0: // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, michael@0: // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY michael@0: // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT michael@0: // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE michael@0: // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. michael@0: michael@0: // disassembler_x86.h: Basic x86 bytecode disassembler michael@0: // michael@0: // Provides a simple disassembler which wraps libdisasm. This allows simple michael@0: // tests to be run against bytecode to test for various properties. michael@0: // michael@0: // Author: Cris Neckar michael@0: michael@0: #ifndef GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_ michael@0: #define GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_ michael@0: michael@0: #include michael@0: #include michael@0: michael@0: #include "google_breakpad/common/breakpad_types.h" michael@0: michael@0: namespace libdis { michael@0: #include "third_party/libdisasm/libdis.h" michael@0: } michael@0: michael@0: namespace google_breakpad { michael@0: michael@0: enum { michael@0: DISX86_NONE = 0x0, michael@0: DISX86_BAD_BRANCH_TARGET = 0x1, michael@0: DISX86_BAD_ARGUMENT_PASSED = 0x2, michael@0: DISX86_BAD_WRITE = 0x4, michael@0: DISX86_BAD_BLOCK_WRITE = 0x8, michael@0: DISX86_BAD_READ = 0x10, michael@0: DISX86_BAD_BLOCK_READ = 0x20, michael@0: DISX86_BAD_COMPARISON = 0x40 michael@0: }; michael@0: michael@0: class DisassemblerX86 { michael@0: public: michael@0: // TODO(cdn): Modify this class to take a MemoryRegion instead of just michael@0: // a raw buffer. This will make it easier to use this on arbitrary michael@0: // minidumps without first copying out the code segment. michael@0: DisassemblerX86(const uint8_t *bytecode, uint32_t, uint32_t); michael@0: ~DisassemblerX86(); michael@0: michael@0: // This walks to the next instruction in the memory region and michael@0: // sets flags based on the type of instruction and previous state michael@0: // including any registers marked as bad through setBadRead() michael@0: // or setBadWrite(). This method can be called in a loop to michael@0: // disassemble until the end of a region. michael@0: uint32_t NextInstruction(); michael@0: michael@0: // Indicates whether the current disassembled instruction was valid. michael@0: bool currentInstructionValid() { return instr_valid_; } michael@0: michael@0: // Returns the current instruction as defined in libdis.h, michael@0: // or NULL if the current instruction is not valid. michael@0: const libdis::x86_insn_t* currentInstruction() { michael@0: return instr_valid_ ? ¤t_instr_ : NULL; michael@0: } michael@0: michael@0: // Returns the type of the current instruction as defined in libdis.h. michael@0: libdis::x86_insn_group currentInstructionGroup() { michael@0: return current_instr_.group; michael@0: } michael@0: michael@0: // Indicates whether a return instruction has been encountered. michael@0: bool endOfBlock() { return end_of_block_; } michael@0: michael@0: // The flags set so far for the disassembly. michael@0: uint16_t flags() { return flags_; } michael@0: michael@0: // This sets an indicator that the register used to determine michael@0: // src or dest for the current instruction is tainted. These can michael@0: // be used after examining the current instruction to indicate, michael@0: // for example that a bad read or write occurred and the pointer michael@0: // stored in the register is currently invalid. michael@0: bool setBadRead(); michael@0: bool setBadWrite(); michael@0: michael@0: protected: michael@0: const uint8_t *bytecode_; michael@0: uint32_t size_; michael@0: uint32_t virtual_address_; michael@0: uint32_t current_byte_offset_; michael@0: uint32_t current_inst_offset_; michael@0: michael@0: bool instr_valid_; michael@0: libdis::x86_insn_t current_instr_; michael@0: michael@0: // TODO(cdn): Maybe also track an expression's index register. michael@0: // ex: mov eax, [ebx + ecx]; ebx is base, ecx is index. michael@0: bool register_valid_; michael@0: libdis::x86_reg_t bad_register_; michael@0: michael@0: bool pushed_bad_value_; michael@0: bool end_of_block_; michael@0: michael@0: uint16_t flags_; michael@0: }; michael@0: michael@0: } // namespace google_breakpad michael@0: michael@0: #endif // GOOGLE_BREAKPAD_PROCESSOR_DISASSEMBLER_X86_H_