michael@0: // Copyright (c) 2012 The Chromium Authors. All rights reserved. michael@0: // Use of this source code is governed by a BSD-style license that can be michael@0: // found in the LICENSE file. michael@0: michael@0: // Definition of MiniDisassembler. michael@0: michael@0: #ifndef SANDBOX_SRC_SIDESTEP_MINI_DISASSEMBLER_H__ michael@0: #define SANDBOX_SRC_SIDESTEP_MINI_DISASSEMBLER_H__ michael@0: michael@0: #include "sandbox/win/src/sidestep/mini_disassembler_types.h" michael@0: michael@0: namespace sidestep { michael@0: michael@0: // This small disassembler is very limited michael@0: // in its functionality, and in fact does only the bare minimum required by the michael@0: // preamble patching utility. It may be useful for other purposes, however. michael@0: // michael@0: // The limitations include at least the following: michael@0: // -# No support for coprocessor opcodes, MMX, etc. michael@0: // -# No machine-readable identification of opcodes or decoding of michael@0: // assembly parameters. The name of the opcode (as a string) is given, michael@0: // however, to aid debugging. michael@0: // michael@0: // You may ask what this little disassembler actually does, then? The answer is michael@0: // that it does the following, which is exactly what the patching utility needs: michael@0: // -# Indicates if opcode is a jump (any kind) or a return (any kind) michael@0: // because this is important for the patching utility to determine if michael@0: // a function is too short or there are jumps too early in it for it michael@0: // to be preamble patched. michael@0: // -# The opcode length is always calculated, so that the patching utility michael@0: // can figure out where the next instruction starts, and whether it michael@0: // already has enough instructions to replace with the absolute jump michael@0: // to the patching code. michael@0: // michael@0: // The usage is quite simple; just create a MiniDisassembler and use its michael@0: // Disassemble() method. michael@0: // michael@0: // If you would like to extend this disassembler, please refer to the michael@0: // IA-32 Intel Architecture Software Developer's Manual Volume 2: michael@0: // Instruction Set Reference for information about operand decoding michael@0: // etc. michael@0: class MiniDisassembler { michael@0: public: michael@0: michael@0: // Creates a new instance and sets defaults. michael@0: // michael@0: // operand_default_32_bits: If true, the default operand size is michael@0: // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. michael@0: // address_default_32_bits: If true, the default address size is michael@0: // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits. michael@0: MiniDisassembler(bool operand_default_32_bits, michael@0: bool address_default_32_bits); michael@0: michael@0: // Equivalent to MiniDisassembler(true, true); michael@0: MiniDisassembler(); michael@0: michael@0: // Attempts to disassemble a single instruction starting from the michael@0: // address in memory it is pointed to. michael@0: // michael@0: // start: Address where disassembly should start. michael@0: // instruction_bytes: Variable that will be incremented by michael@0: // the length in bytes of the instruction. michael@0: // Returns enItJump, enItReturn or enItGeneric on success. enItUnknown michael@0: // if unable to disassemble, enItUnused if this seems to be an unused michael@0: // opcode. In the last two (error) cases, cbInstruction will be set michael@0: // to 0xffffffff. michael@0: // michael@0: // Postcondition: This instance of the disassembler is ready to be used again, michael@0: // with unchanged defaults from creation time. michael@0: InstructionType Disassemble(unsigned char* start, michael@0: unsigned int* instruction_bytes); michael@0: michael@0: private: michael@0: michael@0: // Makes the disassembler ready for reuse. michael@0: void Initialize(); michael@0: michael@0: // Sets the flags for address and operand sizes. michael@0: // Returns Number of prefix bytes. michael@0: InstructionType ProcessPrefixes(unsigned char* start, unsigned int* size); michael@0: michael@0: // Sets the flag for whether we have ModR/M, and increments michael@0: // operand_bytes_ if any are specifies by the opcode directly. michael@0: // Returns Number of opcode bytes. michael@0: InstructionType ProcessOpcode(unsigned char* start, michael@0: unsigned int table, michael@0: unsigned int* size); michael@0: michael@0: // Checks the type of the supplied operand. Increments michael@0: // operand_bytes_ if it directly indicates an immediate etc. michael@0: // operand. Asserts have_modrm_ if the operand specifies michael@0: // a ModR/M byte. michael@0: bool ProcessOperand(int flag_operand); michael@0: michael@0: // Increments operand_bytes_ by size specified by ModR/M and michael@0: // by SIB if present. michael@0: // Returns 0 in case of error, 1 if there is just a ModR/M byte, michael@0: // 2 if there is a ModR/M byte and a SIB byte. michael@0: bool ProcessModrm(unsigned char* start, unsigned int* size); michael@0: michael@0: // Processes the SIB byte that it is pointed to. michael@0: // start: Pointer to the SIB byte. michael@0: // mod: The mod field from the ModR/M byte. michael@0: // Returns 1 to indicate success (indicates 1 SIB byte) michael@0: bool ProcessSib(unsigned char* start, unsigned char mod, unsigned int* size); michael@0: michael@0: // The instruction type we have decoded from the opcode. michael@0: InstructionType instruction_type_; michael@0: michael@0: // Counts the number of bytes that is occupied by operands in michael@0: // the current instruction (note: we don't care about how large michael@0: // operands stored in registers etc. are). michael@0: unsigned int operand_bytes_; michael@0: michael@0: // True iff there is a ModR/M byte in this instruction. michael@0: bool have_modrm_; michael@0: michael@0: // True iff we need to decode the ModR/M byte (sometimes it just michael@0: // points to a register, we can tell by the addressing mode). michael@0: bool should_decode_modrm_; michael@0: michael@0: // Current operand size is 32 bits if true, 16 bits if false. michael@0: bool operand_is_32_bits_; michael@0: michael@0: // Default operand size is 32 bits if true, 16 bits if false. michael@0: bool operand_default_is_32_bits_; michael@0: michael@0: // Current address size is 32 bits if true, 16 bits if false. michael@0: bool address_is_32_bits_; michael@0: michael@0: // Default address size is 32 bits if true, 16 bits if false. michael@0: bool address_default_is_32_bits_; michael@0: michael@0: // Huge big opcode table based on the IA-32 manual, defined michael@0: // in Ia32OpcodeMap.cpp michael@0: static const OpcodeTable s_ia32_opcode_map_[]; michael@0: michael@0: // Somewhat smaller table to help with decoding ModR/M bytes michael@0: // when 16-bit addressing mode is being used. Defined in michael@0: // Ia32ModrmMap.cpp michael@0: static const ModrmEntry s_ia16_modrm_map_[]; michael@0: michael@0: // Somewhat smaller table to help with decoding ModR/M bytes michael@0: // when 32-bit addressing mode is being used. Defined in michael@0: // Ia32ModrmMap.cpp michael@0: static const ModrmEntry s_ia32_modrm_map_[]; michael@0: michael@0: // Indicators of whether we got certain prefixes that certain michael@0: // silly Intel instructions depend on in nonstandard ways for michael@0: // their behaviors. michael@0: bool got_f2_prefix_, got_f3_prefix_, got_66_prefix_; michael@0: }; michael@0: michael@0: }; // namespace sidestep michael@0: michael@0: #endif // SANDBOX_SRC_SIDESTEP_MINI_DISASSEMBLER_H__