michael@0: // Copyright (c) 2012 The Chromium Authors. All rights reserved.
michael@0: // Use of this source code is governed by a BSD-style license that can be
michael@0: // found in the LICENSE file.
michael@0: 
michael@0: // Definition of MiniDisassembler.
michael@0: 
michael@0: #ifndef SANDBOX_SRC_SIDESTEP_MINI_DISASSEMBLER_H__
michael@0: #define SANDBOX_SRC_SIDESTEP_MINI_DISASSEMBLER_H__
michael@0: 
michael@0: #include "sandbox/win/src/sidestep/mini_disassembler_types.h"
michael@0: 
michael@0: namespace sidestep {
michael@0: 
michael@0: // This small disassembler is very limited
michael@0: // in its functionality, and in fact does only the bare minimum required by the
michael@0: // preamble patching utility.  It may be useful for other purposes, however.
michael@0: //
michael@0: // The limitations include at least the following:
michael@0: //  -# No support for coprocessor opcodes, MMX, etc.
michael@0: //  -# No machine-readable identification of opcodes or decoding of
michael@0: //     assembly parameters. The name of the opcode (as a string) is given,
michael@0: //     however, to aid debugging.
michael@0: //
michael@0: // You may ask what this little disassembler actually does, then?  The answer is
michael@0: // that it does the following, which is exactly what the patching utility needs:
michael@0: //  -# Indicates if opcode is a jump (any kind) or a return (any kind)
michael@0: //     because this is important for the patching utility to determine if
michael@0: //     a function is too short or there are jumps too early in it for it
michael@0: //     to be preamble patched.
michael@0: //  -# The opcode length is always calculated, so that the patching utility
michael@0: //     can figure out where the next instruction starts, and whether it
michael@0: //     already has enough instructions to replace with the absolute jump
michael@0: //     to the patching code.
michael@0: //
michael@0: // The usage is quite simple; just create a MiniDisassembler and use its
michael@0: // Disassemble() method.
michael@0: //
michael@0: // If you would like to extend this disassembler, please refer to the
michael@0: // IA-32 Intel Architecture Software Developer's Manual Volume 2:
michael@0: // Instruction Set Reference for information about operand decoding
michael@0: // etc.
michael@0: class MiniDisassembler {
michael@0:  public:
michael@0: 
michael@0:   // Creates a new instance and sets defaults.
michael@0:   //
michael@0:   // operand_default_32_bits: If true, the default operand size is
michael@0:   // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits.
michael@0:   // address_default_32_bits: If true, the default address size is
michael@0:   // set to 32 bits, which is the default under Win32. Otherwise it is 16 bits.
michael@0:   MiniDisassembler(bool operand_default_32_bits,
michael@0:                    bool address_default_32_bits);
michael@0: 
michael@0:   // Equivalent to MiniDisassembler(true, true);
michael@0:   MiniDisassembler();
michael@0: 
michael@0:   // Attempts to disassemble a single instruction starting from the
michael@0:   // address in memory it is pointed to.
michael@0:   //
michael@0:   // start: Address where disassembly should start.
michael@0:   // instruction_bytes: Variable that will be incremented by
michael@0:   // the length in bytes of the instruction.
michael@0:   // Returns enItJump, enItReturn or enItGeneric on success.  enItUnknown
michael@0:   // if unable to disassemble, enItUnused if this seems to be an unused
michael@0:   // opcode. In the last two (error) cases, cbInstruction will be set
michael@0:   // to 0xffffffff.
michael@0:   //
michael@0:   // Postcondition: This instance of the disassembler is ready to be used again,
michael@0:   // with unchanged defaults from creation time.
michael@0:   InstructionType Disassemble(unsigned char* start,
michael@0:                               unsigned int* instruction_bytes);
michael@0: 
michael@0:  private:
michael@0: 
michael@0:   // Makes the disassembler ready for reuse.
michael@0:   void Initialize();
michael@0: 
michael@0:   // Sets the flags for address and operand sizes.
michael@0:   // Returns Number of prefix bytes.
michael@0:   InstructionType ProcessPrefixes(unsigned char* start, unsigned int* size);
michael@0: 
michael@0:   // Sets the flag for whether we have ModR/M, and increments
michael@0:   // operand_bytes_ if any are specifies by the opcode directly.
michael@0:   // Returns Number of opcode bytes.
michael@0:   InstructionType ProcessOpcode(unsigned char* start,
michael@0:                                 unsigned int table,
michael@0:                                 unsigned int* size);
michael@0: 
michael@0:   // Checks the type of the supplied operand.  Increments
michael@0:   // operand_bytes_ if it directly indicates an immediate etc.
michael@0:   // operand.  Asserts have_modrm_ if the operand specifies
michael@0:   // a ModR/M byte.
michael@0:   bool ProcessOperand(int flag_operand);
michael@0: 
michael@0:   // Increments operand_bytes_ by size specified by ModR/M and
michael@0:   // by SIB if present.
michael@0:   // Returns 0 in case of error, 1 if there is just a ModR/M byte,
michael@0:   // 2 if there is a ModR/M byte and a SIB byte.
michael@0:   bool ProcessModrm(unsigned char* start, unsigned int* size);
michael@0: 
michael@0:   // Processes the SIB byte that it is pointed to.
michael@0:   // start: Pointer to the SIB byte.
michael@0:   // mod: The mod field from the ModR/M byte.
michael@0:   // Returns 1 to indicate success (indicates 1 SIB byte)
michael@0:   bool ProcessSib(unsigned char* start, unsigned char mod, unsigned int* size);
michael@0: 
michael@0:   // The instruction type we have decoded from the opcode.
michael@0:   InstructionType instruction_type_;
michael@0: 
michael@0:   // Counts the number of bytes that is occupied by operands in
michael@0:   // the current instruction (note: we don't care about how large
michael@0:   // operands stored in registers etc. are).
michael@0:   unsigned int operand_bytes_;
michael@0: 
michael@0:   // True iff there is a ModR/M byte in this instruction.
michael@0:   bool have_modrm_;
michael@0: 
michael@0:   // True iff we need to decode the ModR/M byte (sometimes it just
michael@0:   // points to a register, we can tell by the addressing mode).
michael@0:   bool should_decode_modrm_;
michael@0: 
michael@0:   // Current operand size is 32 bits if true, 16 bits if false.
michael@0:   bool operand_is_32_bits_;
michael@0: 
michael@0:   // Default operand size is 32 bits if true, 16 bits if false.
michael@0:   bool operand_default_is_32_bits_;
michael@0: 
michael@0:   // Current address size is 32 bits if true, 16 bits if false.
michael@0:   bool address_is_32_bits_;
michael@0: 
michael@0:   // Default address size is 32 bits if true, 16 bits if false.
michael@0:   bool address_default_is_32_bits_;
michael@0: 
michael@0:   // Huge big opcode table based on the IA-32 manual, defined
michael@0:   // in Ia32OpcodeMap.cpp
michael@0:   static const OpcodeTable s_ia32_opcode_map_[];
michael@0: 
michael@0:   // Somewhat smaller table to help with decoding ModR/M bytes
michael@0:   // when 16-bit addressing mode is being used.  Defined in
michael@0:   // Ia32ModrmMap.cpp
michael@0:   static const ModrmEntry s_ia16_modrm_map_[];
michael@0: 
michael@0:   // Somewhat smaller table to help with decoding ModR/M bytes
michael@0:   // when 32-bit addressing mode is being used.  Defined in
michael@0:   // Ia32ModrmMap.cpp
michael@0:   static const ModrmEntry s_ia32_modrm_map_[];
michael@0: 
michael@0:   // Indicators of whether we got certain prefixes that certain
michael@0:   // silly Intel instructions depend on in nonstandard ways for
michael@0:   // their behaviors.
michael@0:   bool got_f2_prefix_, got_f3_prefix_, got_66_prefix_;
michael@0: };
michael@0: 
michael@0: };  // namespace sidestep
michael@0: 
michael@0: #endif  // SANDBOX_SRC_SIDESTEP_MINI_DISASSEMBLER_H__