michael@0: // Copyright (c) 2012 The Chromium Authors. All rights reserved. michael@0: // Use of this source code is governed by a BSD-style license that can be michael@0: // found in the LICENSE file. michael@0: michael@0: // Implementation of MiniDisassembler. michael@0: michael@0: #ifdef _WIN64 michael@0: #error The code in this file should not be used on 64-bit Windows. michael@0: #endif michael@0: michael@0: #include "sandbox/win/src/sidestep/mini_disassembler.h" michael@0: michael@0: namespace sidestep { michael@0: michael@0: MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, michael@0: bool address_default_is_32_bits) michael@0: : operand_default_is_32_bits_(operand_default_is_32_bits), michael@0: address_default_is_32_bits_(address_default_is_32_bits) { michael@0: Initialize(); michael@0: } michael@0: michael@0: MiniDisassembler::MiniDisassembler() michael@0: : operand_default_is_32_bits_(true), michael@0: address_default_is_32_bits_(true) { michael@0: Initialize(); michael@0: } michael@0: michael@0: InstructionType MiniDisassembler::Disassemble( michael@0: unsigned char* start_byte, michael@0: unsigned int* instruction_bytes) { michael@0: // Clean up any state from previous invocations. michael@0: Initialize(); michael@0: michael@0: // Start by processing any prefixes. michael@0: unsigned char* current_byte = start_byte; michael@0: unsigned int size = 0; michael@0: InstructionType instruction_type = ProcessPrefixes(current_byte, &size); michael@0: michael@0: if (IT_UNKNOWN == instruction_type) michael@0: return instruction_type; michael@0: michael@0: current_byte += size; michael@0: size = 0; michael@0: michael@0: // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ michael@0: // and address_is_32_bits_ flags are correctly set. michael@0: michael@0: instruction_type = ProcessOpcode(current_byte, 0, &size); michael@0: michael@0: // Check for error processing instruction michael@0: if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { michael@0: return IT_UNKNOWN; michael@0: } michael@0: michael@0: current_byte += size; michael@0: michael@0: // Invariant: operand_bytes_ indicates the total size of operands michael@0: // specified by the opcode and/or ModR/M byte and/or SIB byte. michael@0: // pCurrentByte points to the first byte after the ModR/M byte, or after michael@0: // the SIB byte if it is present (i.e. the first byte of any operands michael@0: // encoded in the instruction). michael@0: michael@0: // We get the total length of any prefixes, the opcode, and the ModR/M and michael@0: // SIB bytes if present, by taking the difference of the original starting michael@0: // address and the current byte (which points to the first byte of the michael@0: // operands if present, or to the first byte of the next instruction if michael@0: // they are not). Adding the count of bytes in the operands encoded in michael@0: // the instruction gives us the full length of the instruction in bytes. michael@0: *instruction_bytes += operand_bytes_ + (current_byte - start_byte); michael@0: michael@0: // Return the instruction type, which was set by ProcessOpcode(). michael@0: return instruction_type_; michael@0: } michael@0: michael@0: void MiniDisassembler::Initialize() { michael@0: operand_is_32_bits_ = operand_default_is_32_bits_; michael@0: address_is_32_bits_ = address_default_is_32_bits_; michael@0: operand_bytes_ = 0; michael@0: have_modrm_ = false; michael@0: should_decode_modrm_ = false; michael@0: instruction_type_ = IT_UNKNOWN; michael@0: got_f2_prefix_ = false; michael@0: got_f3_prefix_ = false; michael@0: got_66_prefix_ = false; michael@0: } michael@0: michael@0: InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, michael@0: unsigned int* size) { michael@0: InstructionType instruction_type = IT_GENERIC; michael@0: const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; michael@0: michael@0: switch (opcode.type_) { michael@0: case IT_PREFIX_ADDRESS: michael@0: address_is_32_bits_ = !address_default_is_32_bits_; michael@0: goto nochangeoperand; michael@0: case IT_PREFIX_OPERAND: michael@0: operand_is_32_bits_ = !operand_default_is_32_bits_; michael@0: nochangeoperand: michael@0: case IT_PREFIX: michael@0: michael@0: if (0xF2 == (*start_byte)) michael@0: got_f2_prefix_ = true; michael@0: else if (0xF3 == (*start_byte)) michael@0: got_f3_prefix_ = true; michael@0: else if (0x66 == (*start_byte)) michael@0: got_66_prefix_ = true; michael@0: michael@0: instruction_type = opcode.type_; michael@0: (*size)++; michael@0: // we got a prefix, so add one and check next byte michael@0: ProcessPrefixes(start_byte + 1, size); michael@0: default: michael@0: break; // not a prefix byte michael@0: } michael@0: michael@0: return instruction_type; michael@0: } michael@0: michael@0: InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, michael@0: unsigned int table_index, michael@0: unsigned int* size) { michael@0: const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table michael@0: unsigned char current_byte = (*start_byte) >> table.shift_; michael@0: current_byte = current_byte & table.mask_; // Mask out the bits we will use michael@0: michael@0: // Check whether the byte we have is inside the table we have. michael@0: if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { michael@0: instruction_type_ = IT_UNKNOWN; michael@0: return instruction_type_; michael@0: } michael@0: michael@0: const Opcode& opcode = table.table_[current_byte]; michael@0: if (IT_UNUSED == opcode.type_) { michael@0: // This instruction is not used by the IA-32 ISA, so we indicate michael@0: // this to the user. Probably means that we were pointed to michael@0: // a byte in memory that was not the start of an instruction. michael@0: instruction_type_ = IT_UNUSED; michael@0: return instruction_type_; michael@0: } else if (IT_REFERENCE == opcode.type_) { michael@0: // We are looking at an opcode that has more bytes (or is continued michael@0: // in the ModR/M byte). Recursively find the opcode definition in michael@0: // the table for the opcode's next byte. michael@0: (*size)++; michael@0: ProcessOpcode(start_byte + 1, opcode.table_index_, size); michael@0: return instruction_type_; michael@0: } michael@0: michael@0: const SpecificOpcode* specific_opcode = reinterpret_cast< michael@0: const SpecificOpcode*>(&opcode); michael@0: if (opcode.is_prefix_dependent_) { michael@0: if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { michael@0: specific_opcode = &opcode.opcode_if_f2_prefix_; michael@0: } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { michael@0: specific_opcode = &opcode.opcode_if_f3_prefix_; michael@0: } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { michael@0: specific_opcode = &opcode.opcode_if_66_prefix_; michael@0: } michael@0: } michael@0: michael@0: // Inv: The opcode type is known. michael@0: instruction_type_ = specific_opcode->type_; michael@0: michael@0: // Let's process the operand types to see if we have any immediate michael@0: // operands, and/or a ModR/M byte. michael@0: michael@0: ProcessOperand(specific_opcode->flag_dest_); michael@0: ProcessOperand(specific_opcode->flag_source_); michael@0: ProcessOperand(specific_opcode->flag_aux_); michael@0: michael@0: // Inv: We have processed the opcode and incremented operand_bytes_ michael@0: // by the number of bytes of any operands specified by the opcode michael@0: // that are stored in the instruction (not registers etc.). Now michael@0: // we need to return the total number of bytes for the opcode and michael@0: // for the ModR/M or SIB bytes if they are present. michael@0: michael@0: if (table.mask_ != 0xff) { michael@0: if (have_modrm_) { michael@0: // we're looking at a ModR/M byte so we're not going to michael@0: // count that into the opcode size michael@0: ProcessModrm(start_byte, size); michael@0: return IT_GENERIC; michael@0: } else { michael@0: // need to count the ModR/M byte even if it's just being michael@0: // used for opcode extension michael@0: (*size)++; michael@0: return IT_GENERIC; michael@0: } michael@0: } else { michael@0: if (have_modrm_) { michael@0: // The ModR/M byte is the next byte. michael@0: (*size)++; michael@0: ProcessModrm(start_byte + 1, size); michael@0: return IT_GENERIC; michael@0: } else { michael@0: (*size)++; michael@0: return IT_GENERIC; michael@0: } michael@0: } michael@0: } michael@0: michael@0: bool MiniDisassembler::ProcessOperand(int flag_operand) { michael@0: bool succeeded = true; michael@0: if (AM_NOT_USED == flag_operand) michael@0: return succeeded; michael@0: michael@0: // Decide what to do based on the addressing mode. michael@0: switch (flag_operand & AM_MASK) { michael@0: // No ModR/M byte indicated by these addressing modes, and no michael@0: // additional (e.g. immediate) parameters. michael@0: case AM_A: // Direct address michael@0: case AM_F: // EFLAGS register michael@0: case AM_X: // Memory addressed by the DS:SI register pair michael@0: case AM_Y: // Memory addressed by the ES:DI register pair michael@0: case AM_IMPLICIT: // Parameter is implicit, occupies no space in michael@0: // instruction michael@0: break; michael@0: michael@0: // There is a ModR/M byte but it does not necessarily need michael@0: // to be decoded. michael@0: case AM_C: // reg field of ModR/M selects a control register michael@0: case AM_D: // reg field of ModR/M selects a debug register michael@0: case AM_G: // reg field of ModR/M selects a general register michael@0: case AM_P: // reg field of ModR/M selects an MMX register michael@0: case AM_R: // mod field of ModR/M may refer only to a general register michael@0: case AM_S: // reg field of ModR/M selects a segment register michael@0: case AM_T: // reg field of ModR/M selects a test register michael@0: case AM_V: // reg field of ModR/M selects a 128-bit XMM register michael@0: have_modrm_ = true; michael@0: break; michael@0: michael@0: // In these addressing modes, there is a ModR/M byte and it needs to be michael@0: // decoded. No other (e.g. immediate) params than indicated in ModR/M. michael@0: case AM_E: // Operand is either a general-purpose register or memory, michael@0: // specified by ModR/M byte michael@0: case AM_M: // ModR/M byte will refer only to memory michael@0: case AM_Q: // Operand is either an MMX register or memory (complex michael@0: // evaluation), specified by ModR/M byte michael@0: case AM_W: // Operand is either a 128-bit XMM register or memory (complex michael@0: // eval), specified by ModR/M byte michael@0: have_modrm_ = true; michael@0: should_decode_modrm_ = true; michael@0: break; michael@0: michael@0: // These addressing modes specify an immediate or an offset value michael@0: // directly, so we need to look at the operand type to see how many michael@0: // bytes. michael@0: case AM_I: // Immediate data. michael@0: case AM_J: // Jump to offset. michael@0: case AM_O: // Operand is at offset. michael@0: switch (flag_operand & OT_MASK) { michael@0: case OT_B: // Byte regardless of operand-size attribute. michael@0: operand_bytes_ += OS_BYTE; michael@0: break; michael@0: case OT_C: // Byte or word, depending on operand-size attribute. michael@0: if (operand_is_32_bits_) michael@0: operand_bytes_ += OS_WORD; michael@0: else michael@0: operand_bytes_ += OS_BYTE; michael@0: break; michael@0: case OT_D: // Doubleword, regardless of operand-size attribute. michael@0: operand_bytes_ += OS_DOUBLE_WORD; michael@0: break; michael@0: case OT_DQ: // Double-quadword, regardless of operand-size attribute. michael@0: operand_bytes_ += OS_DOUBLE_QUAD_WORD; michael@0: break; michael@0: case OT_P: // 32-bit or 48-bit pointer, depending on operand-size michael@0: // attribute. michael@0: if (operand_is_32_bits_) michael@0: operand_bytes_ += OS_48_BIT_POINTER; michael@0: else michael@0: operand_bytes_ += OS_32_BIT_POINTER; michael@0: break; michael@0: case OT_PS: // 128-bit packed single-precision floating-point data. michael@0: operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; michael@0: break; michael@0: case OT_Q: // Quadword, regardless of operand-size attribute. michael@0: operand_bytes_ += OS_QUAD_WORD; michael@0: break; michael@0: case OT_S: // 6-byte pseudo-descriptor. michael@0: operand_bytes_ += OS_PSEUDO_DESCRIPTOR; michael@0: break; michael@0: case OT_SD: // Scalar Double-Precision Floating-Point Value michael@0: case OT_PD: // Unaligned packed double-precision floating point value michael@0: operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; michael@0: break; michael@0: case OT_SS: michael@0: // Scalar element of a 128-bit packed single-precision michael@0: // floating data. michael@0: // We simply return enItUnknown since we don't have to support michael@0: // floating point michael@0: succeeded = false; michael@0: break; michael@0: case OT_V: // Word or doubleword, depending on operand-size attribute. michael@0: if (operand_is_32_bits_) michael@0: operand_bytes_ += OS_DOUBLE_WORD; michael@0: else michael@0: operand_bytes_ += OS_WORD; michael@0: break; michael@0: case OT_W: // Word, regardless of operand-size attribute. michael@0: operand_bytes_ += OS_WORD; michael@0: break; michael@0: michael@0: // Can safely ignore these. michael@0: case OT_A: // Two one-word operands in memory or two double-word michael@0: // operands in memory michael@0: case OT_PI: // Quadword MMX technology register (e.g. mm0) michael@0: case OT_SI: // Doubleword integer register (e.g., eax) michael@0: break; michael@0: michael@0: default: michael@0: break; michael@0: } michael@0: break; michael@0: michael@0: default: michael@0: break; michael@0: } michael@0: michael@0: return succeeded; michael@0: } michael@0: michael@0: bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, michael@0: unsigned int* size) { michael@0: // If we don't need to decode, we just return the size of the ModR/M michael@0: // byte (there is never a SIB byte in this case). michael@0: if (!should_decode_modrm_) { michael@0: (*size)++; michael@0: return true; michael@0: } michael@0: michael@0: // We never care about the reg field, only the combination of the mod michael@0: // and r/m fields, so let's start by packing those fields together into michael@0: // 5 bits. michael@0: unsigned char modrm = (*start_byte); michael@0: unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field michael@0: modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field michael@0: mod = mod >> 3; // shift the mod field to the right place michael@0: modrm = mod | modrm; // combine the r/m and mod fields as discussed michael@0: mod = mod >> 3; // shift the mod field to bits 2..0 michael@0: michael@0: // Invariant: modrm contains the mod field in bits 4..3 and the r/m field michael@0: // in bits 2..0, and mod contains the mod field in bits 2..0 michael@0: michael@0: const ModrmEntry* modrm_entry = 0; michael@0: if (address_is_32_bits_) michael@0: modrm_entry = &s_ia32_modrm_map_[modrm]; michael@0: else michael@0: modrm_entry = &s_ia16_modrm_map_[modrm]; michael@0: michael@0: // Invariant: modrm_entry points to information that we need to decode michael@0: // the ModR/M byte. michael@0: michael@0: // Add to the count of operand bytes, if the ModR/M byte indicates michael@0: // that some operands are encoded in the instruction. michael@0: if (modrm_entry->is_encoded_in_instruction_) michael@0: operand_bytes_ += modrm_entry->operand_size_; michael@0: michael@0: // Process the SIB byte if necessary, and return the count michael@0: // of ModR/M and SIB bytes. michael@0: if (modrm_entry->use_sib_byte_) { michael@0: (*size)++; michael@0: return ProcessSib(start_byte + 1, mod, size); michael@0: } else { michael@0: (*size)++; michael@0: return true; michael@0: } michael@0: } michael@0: michael@0: bool MiniDisassembler::ProcessSib(unsigned char* start_byte, michael@0: unsigned char mod, michael@0: unsigned int* size) { michael@0: // get the mod field from the 2..0 bits of the SIB byte michael@0: unsigned char sib_base = (*start_byte) & 0x07; michael@0: if (0x05 == sib_base) { michael@0: switch (mod) { michael@0: case 0x00: // mod == 00 michael@0: case 0x02: // mod == 10 michael@0: operand_bytes_ += OS_DOUBLE_WORD; michael@0: break; michael@0: case 0x01: // mod == 01 michael@0: operand_bytes_ += OS_BYTE; michael@0: break; michael@0: case 0x03: // mod == 11 michael@0: // According to the IA-32 docs, there does not seem to be a disp michael@0: // value for this value of mod michael@0: default: michael@0: break; michael@0: } michael@0: } michael@0: michael@0: (*size)++; michael@0: return true; michael@0: } michael@0: michael@0: }; // namespace sidestep