security/sandbox/win/src/sidestep/mini_disassembler.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/security/sandbox/win/src/sidestep/mini_disassembler.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,395 @@
     1.4 +// Copyright (c) 2012 The Chromium Authors. All rights reserved.
     1.5 +// Use of this source code is governed by a BSD-style license that can be
     1.6 +// found in the LICENSE file.
     1.7 +
     1.8 +// Implementation of MiniDisassembler.
     1.9 +
    1.10 +#ifdef _WIN64
    1.11 +#error The code in this file should not be used on 64-bit Windows.
    1.12 +#endif
    1.13 +
    1.14 +#include "sandbox/win/src/sidestep/mini_disassembler.h"
    1.15 +
    1.16 +namespace sidestep {
    1.17 +
    1.18 +MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
    1.19 +                                   bool address_default_is_32_bits)
    1.20 +    : operand_default_is_32_bits_(operand_default_is_32_bits),
    1.21 +      address_default_is_32_bits_(address_default_is_32_bits) {
    1.22 +  Initialize();
    1.23 +}
    1.24 +
    1.25 +MiniDisassembler::MiniDisassembler()
    1.26 +    : operand_default_is_32_bits_(true),
    1.27 +      address_default_is_32_bits_(true) {
    1.28 +  Initialize();
    1.29 +}
    1.30 +
    1.31 +InstructionType MiniDisassembler::Disassemble(
    1.32 +    unsigned char* start_byte,
    1.33 +    unsigned int* instruction_bytes) {
    1.34 +  // Clean up any state from previous invocations.
    1.35 +  Initialize();
    1.36 +
    1.37 +  // Start by processing any prefixes.
    1.38 +  unsigned char* current_byte = start_byte;
    1.39 +  unsigned int size = 0;
    1.40 +  InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
    1.41 +
    1.42 +  if (IT_UNKNOWN == instruction_type)
    1.43 +    return instruction_type;
    1.44 +
    1.45 +  current_byte += size;
    1.46 +  size = 0;
    1.47 +
    1.48 +  // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
    1.49 +  // and address_is_32_bits_ flags are correctly set.
    1.50 +
    1.51 +  instruction_type = ProcessOpcode(current_byte, 0, &size);
    1.52 +
    1.53 +  // Check for error processing instruction
    1.54 +  if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
    1.55 +    return IT_UNKNOWN;
    1.56 +  }
    1.57 +
    1.58 +  current_byte += size;
    1.59 +
    1.60 +  // Invariant: operand_bytes_ indicates the total size of operands
    1.61 +  // specified by the opcode and/or ModR/M byte and/or SIB byte.
    1.62 +  // pCurrentByte points to the first byte after the ModR/M byte, or after
    1.63 +  // the SIB byte if it is present (i.e. the first byte of any operands
    1.64 +  // encoded in the instruction).
    1.65 +
    1.66 +  // We get the total length of any prefixes, the opcode, and the ModR/M and
    1.67 +  // SIB bytes if present, by taking the difference of the original starting
    1.68 +  // address and the current byte (which points to the first byte of the
    1.69 +  // operands if present, or to the first byte of the next instruction if
    1.70 +  // they are not).  Adding the count of bytes in the operands encoded in
    1.71 +  // the instruction gives us the full length of the instruction in bytes.
    1.72 +  *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
    1.73 +
    1.74 +  // Return the instruction type, which was set by ProcessOpcode().
    1.75 +  return instruction_type_;
    1.76 +}
    1.77 +
    1.78 +void MiniDisassembler::Initialize() {
    1.79 +  operand_is_32_bits_ = operand_default_is_32_bits_;
    1.80 +  address_is_32_bits_ = address_default_is_32_bits_;
    1.81 +  operand_bytes_ = 0;
    1.82 +  have_modrm_ = false;
    1.83 +  should_decode_modrm_ = false;
    1.84 +  instruction_type_ = IT_UNKNOWN;
    1.85 +  got_f2_prefix_ = false;
    1.86 +  got_f3_prefix_ = false;
    1.87 +  got_66_prefix_ = false;
    1.88 +}
    1.89 +
    1.90 +InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
    1.91 +                                                  unsigned int* size) {
    1.92 +  InstructionType instruction_type = IT_GENERIC;
    1.93 +  const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
    1.94 +
    1.95 +  switch (opcode.type_) {
    1.96 +    case IT_PREFIX_ADDRESS:
    1.97 +      address_is_32_bits_ = !address_default_is_32_bits_;
    1.98 +      goto nochangeoperand;
    1.99 +    case IT_PREFIX_OPERAND:
   1.100 +      operand_is_32_bits_ = !operand_default_is_32_bits_;
   1.101 +      nochangeoperand:
   1.102 +    case IT_PREFIX:
   1.103 +
   1.104 +      if (0xF2 == (*start_byte))
   1.105 +        got_f2_prefix_ = true;
   1.106 +      else if (0xF3 == (*start_byte))
   1.107 +        got_f3_prefix_ = true;
   1.108 +      else if (0x66 == (*start_byte))
   1.109 +        got_66_prefix_ = true;
   1.110 +
   1.111 +      instruction_type = opcode.type_;
   1.112 +      (*size)++;
   1.113 +      // we got a prefix, so add one and check next byte
   1.114 +      ProcessPrefixes(start_byte + 1, size);
   1.115 +    default:
   1.116 +      break;   // not a prefix byte
   1.117 +  }
   1.118 +
   1.119 +  return instruction_type;
   1.120 +}
   1.121 +
   1.122 +InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
   1.123 +                                                unsigned int table_index,
   1.124 +                                                unsigned int* size) {
   1.125 +  const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
   1.126 +  unsigned char current_byte = (*start_byte) >> table.shift_;
   1.127 +  current_byte = current_byte & table.mask_;  // Mask out the bits we will use
   1.128 +
   1.129 +  // Check whether the byte we have is inside the table we have.
   1.130 +  if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
   1.131 +    instruction_type_ = IT_UNKNOWN;
   1.132 +    return instruction_type_;
   1.133 +  }
   1.134 +
   1.135 +  const Opcode& opcode = table.table_[current_byte];
   1.136 +  if (IT_UNUSED == opcode.type_) {
   1.137 +    // This instruction is not used by the IA-32 ISA, so we indicate
   1.138 +    // this to the user.  Probably means that we were pointed to
   1.139 +    // a byte in memory that was not the start of an instruction.
   1.140 +    instruction_type_ = IT_UNUSED;
   1.141 +    return instruction_type_;
   1.142 +  } else if (IT_REFERENCE == opcode.type_) {
   1.143 +    // We are looking at an opcode that has more bytes (or is continued
   1.144 +    // in the ModR/M byte).  Recursively find the opcode definition in
   1.145 +    // the table for the opcode's next byte.
   1.146 +    (*size)++;
   1.147 +    ProcessOpcode(start_byte + 1, opcode.table_index_, size);
   1.148 +    return instruction_type_;
   1.149 +  }
   1.150 +
   1.151 +  const SpecificOpcode* specific_opcode = reinterpret_cast<
   1.152 +                                              const SpecificOpcode*>(&opcode);
   1.153 +  if (opcode.is_prefix_dependent_) {
   1.154 +    if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
   1.155 +      specific_opcode = &opcode.opcode_if_f2_prefix_;
   1.156 +    } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
   1.157 +      specific_opcode = &opcode.opcode_if_f3_prefix_;
   1.158 +    } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
   1.159 +      specific_opcode = &opcode.opcode_if_66_prefix_;
   1.160 +    }
   1.161 +  }
   1.162 +
   1.163 +  // Inv: The opcode type is known.
   1.164 +  instruction_type_ = specific_opcode->type_;
   1.165 +
   1.166 +  // Let's process the operand types to see if we have any immediate
   1.167 +  // operands, and/or a ModR/M byte.
   1.168 +
   1.169 +  ProcessOperand(specific_opcode->flag_dest_);
   1.170 +  ProcessOperand(specific_opcode->flag_source_);
   1.171 +  ProcessOperand(specific_opcode->flag_aux_);
   1.172 +
   1.173 +  // Inv: We have processed the opcode and incremented operand_bytes_
   1.174 +  // by the number of bytes of any operands specified by the opcode
   1.175 +  // that are stored in the instruction (not registers etc.).  Now
   1.176 +  // we need to return the total number of bytes for the opcode and
   1.177 +  // for the ModR/M or SIB bytes if they are present.
   1.178 +
   1.179 +  if (table.mask_ != 0xff) {
   1.180 +    if (have_modrm_) {
   1.181 +      // we're looking at a ModR/M byte so we're not going to
   1.182 +      // count that into the opcode size
   1.183 +      ProcessModrm(start_byte, size);
   1.184 +      return IT_GENERIC;
   1.185 +    } else {
   1.186 +      // need to count the ModR/M byte even if it's just being
   1.187 +      // used for opcode extension
   1.188 +      (*size)++;
   1.189 +      return IT_GENERIC;
   1.190 +    }
   1.191 +  } else {
   1.192 +    if (have_modrm_) {
   1.193 +      // The ModR/M byte is the next byte.
   1.194 +      (*size)++;
   1.195 +      ProcessModrm(start_byte + 1, size);
   1.196 +      return IT_GENERIC;
   1.197 +    } else {
   1.198 +      (*size)++;
   1.199 +      return IT_GENERIC;
   1.200 +    }
   1.201 +  }
   1.202 +}
   1.203 +
   1.204 +bool MiniDisassembler::ProcessOperand(int flag_operand) {
   1.205 +  bool succeeded = true;
   1.206 +  if (AM_NOT_USED == flag_operand)
   1.207 +    return succeeded;
   1.208 +
   1.209 +  // Decide what to do based on the addressing mode.
   1.210 +  switch (flag_operand & AM_MASK) {
   1.211 +    // No ModR/M byte indicated by these addressing modes, and no
   1.212 +    // additional (e.g. immediate) parameters.
   1.213 +    case AM_A:  // Direct address
   1.214 +    case AM_F:  // EFLAGS register
   1.215 +    case AM_X:  // Memory addressed by the DS:SI register pair
   1.216 +    case AM_Y:  // Memory addressed by the ES:DI register pair
   1.217 +    case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
   1.218 +                       // instruction
   1.219 +      break;
   1.220 +
   1.221 +    // There is a ModR/M byte but it does not necessarily need
   1.222 +    // to be decoded.
   1.223 +    case AM_C:  // reg field of ModR/M selects a control register
   1.224 +    case AM_D:  // reg field of ModR/M selects a debug register
   1.225 +    case AM_G:  // reg field of ModR/M selects a general register
   1.226 +    case AM_P:  // reg field of ModR/M selects an MMX register
   1.227 +    case AM_R:  // mod field of ModR/M may refer only to a general register
   1.228 +    case AM_S:  // reg field of ModR/M selects a segment register
   1.229 +    case AM_T:  // reg field of ModR/M selects a test register
   1.230 +    case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
   1.231 +      have_modrm_ = true;
   1.232 +      break;
   1.233 +
   1.234 +    // In these addressing modes, there is a ModR/M byte and it needs to be
   1.235 +    // decoded. No other (e.g. immediate) params than indicated in ModR/M.
   1.236 +    case AM_E:  // Operand is either a general-purpose register or memory,
   1.237 +                // specified by ModR/M byte
   1.238 +    case AM_M:  // ModR/M byte will refer only to memory
   1.239 +    case AM_Q:  // Operand is either an MMX register or memory (complex
   1.240 +                // evaluation), specified by ModR/M byte
   1.241 +    case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
   1.242 +                // eval), specified by ModR/M byte
   1.243 +      have_modrm_ = true;
   1.244 +      should_decode_modrm_ = true;
   1.245 +      break;
   1.246 +
   1.247 +    // These addressing modes specify an immediate or an offset value
   1.248 +    // directly, so we need to look at the operand type to see how many
   1.249 +    // bytes.
   1.250 +    case AM_I:  // Immediate data.
   1.251 +    case AM_J:  // Jump to offset.
   1.252 +    case AM_O:  // Operand is at offset.
   1.253 +      switch (flag_operand & OT_MASK) {
   1.254 +        case OT_B:  // Byte regardless of operand-size attribute.
   1.255 +          operand_bytes_ += OS_BYTE;
   1.256 +          break;
   1.257 +        case OT_C:  // Byte or word, depending on operand-size attribute.
   1.258 +          if (operand_is_32_bits_)
   1.259 +            operand_bytes_ += OS_WORD;
   1.260 +          else
   1.261 +            operand_bytes_ += OS_BYTE;
   1.262 +          break;
   1.263 +        case OT_D:  // Doubleword, regardless of operand-size attribute.
   1.264 +          operand_bytes_ += OS_DOUBLE_WORD;
   1.265 +          break;
   1.266 +        case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
   1.267 +          operand_bytes_ += OS_DOUBLE_QUAD_WORD;
   1.268 +          break;
   1.269 +        case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
   1.270 +                    // attribute.
   1.271 +          if (operand_is_32_bits_)
   1.272 +            operand_bytes_ += OS_48_BIT_POINTER;
   1.273 +          else
   1.274 +            operand_bytes_ += OS_32_BIT_POINTER;
   1.275 +          break;
   1.276 +        case OT_PS:  // 128-bit packed single-precision floating-point data.
   1.277 +          operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
   1.278 +          break;
   1.279 +        case OT_Q:  // Quadword, regardless of operand-size attribute.
   1.280 +          operand_bytes_ += OS_QUAD_WORD;
   1.281 +          break;
   1.282 +        case OT_S:  // 6-byte pseudo-descriptor.
   1.283 +          operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
   1.284 +          break;
   1.285 +        case OT_SD:  // Scalar Double-Precision Floating-Point Value
   1.286 +        case OT_PD:  // Unaligned packed double-precision floating point value
   1.287 +          operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
   1.288 +          break;
   1.289 +        case OT_SS:
   1.290 +          // Scalar element of a 128-bit packed single-precision
   1.291 +          // floating data.
   1.292 +          // We simply return enItUnknown since we don't have to support
   1.293 +          // floating point
   1.294 +          succeeded = false;
   1.295 +          break;
   1.296 +        case OT_V:  // Word or doubleword, depending on operand-size attribute.
   1.297 +          if (operand_is_32_bits_)
   1.298 +            operand_bytes_ += OS_DOUBLE_WORD;
   1.299 +          else
   1.300 +            operand_bytes_ += OS_WORD;
   1.301 +          break;
   1.302 +        case OT_W:  // Word, regardless of operand-size attribute.
   1.303 +          operand_bytes_ += OS_WORD;
   1.304 +          break;
   1.305 +
   1.306 +        // Can safely ignore these.
   1.307 +        case OT_A:  // Two one-word operands in memory or two double-word
   1.308 +                    // operands in memory
   1.309 +        case OT_PI:  // Quadword MMX technology register (e.g. mm0)
   1.310 +        case OT_SI:  // Doubleword integer register (e.g., eax)
   1.311 +          break;
   1.312 +
   1.313 +        default:
   1.314 +          break;
   1.315 +      }
   1.316 +      break;
   1.317 +
   1.318 +    default:
   1.319 +      break;
   1.320 +  }
   1.321 +
   1.322 +  return succeeded;
   1.323 +}
   1.324 +
   1.325 +bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
   1.326 +                                    unsigned int* size) {
   1.327 +  // If we don't need to decode, we just return the size of the ModR/M
   1.328 +  // byte (there is never a SIB byte in this case).
   1.329 +  if (!should_decode_modrm_) {
   1.330 +    (*size)++;
   1.331 +    return true;
   1.332 +  }
   1.333 +
   1.334 +  // We never care about the reg field, only the combination of the mod
   1.335 +  // and r/m fields, so let's start by packing those fields together into
   1.336 +  // 5 bits.
   1.337 +  unsigned char modrm = (*start_byte);
   1.338 +  unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
   1.339 +  modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
   1.340 +  mod = mod >> 3;  // shift the mod field to the right place
   1.341 +  modrm = mod | modrm;  // combine the r/m and mod fields as discussed
   1.342 +  mod = mod >> 3;  // shift the mod field to bits 2..0
   1.343 +
   1.344 +  // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
   1.345 +  // in bits 2..0, and mod contains the mod field in bits 2..0
   1.346 +
   1.347 +  const ModrmEntry* modrm_entry = 0;
   1.348 +  if (address_is_32_bits_)
   1.349 +    modrm_entry = &s_ia32_modrm_map_[modrm];
   1.350 +  else
   1.351 +    modrm_entry = &s_ia16_modrm_map_[modrm];
   1.352 +
   1.353 +  // Invariant: modrm_entry points to information that we need to decode
   1.354 +  // the ModR/M byte.
   1.355 +
   1.356 +  // Add to the count of operand bytes, if the ModR/M byte indicates
   1.357 +  // that some operands are encoded in the instruction.
   1.358 +  if (modrm_entry->is_encoded_in_instruction_)
   1.359 +    operand_bytes_ += modrm_entry->operand_size_;
   1.360 +
   1.361 +  // Process the SIB byte if necessary, and return the count
   1.362 +  // of ModR/M and SIB bytes.
   1.363 +  if (modrm_entry->use_sib_byte_) {
   1.364 +    (*size)++;
   1.365 +    return ProcessSib(start_byte + 1, mod, size);
   1.366 +  } else {
   1.367 +    (*size)++;
   1.368 +    return true;
   1.369 +  }
   1.370 +}
   1.371 +
   1.372 +bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
   1.373 +                                  unsigned char mod,
   1.374 +                                  unsigned int* size) {
   1.375 +  // get the mod field from the 2..0 bits of the SIB byte
   1.376 +  unsigned char sib_base = (*start_byte) & 0x07;
   1.377 +  if (0x05 == sib_base) {
   1.378 +    switch (mod) {
   1.379 +      case 0x00:  // mod == 00
   1.380 +      case 0x02:  // mod == 10
   1.381 +        operand_bytes_ += OS_DOUBLE_WORD;
   1.382 +        break;
   1.383 +      case 0x01:  // mod == 01
   1.384 +        operand_bytes_ += OS_BYTE;
   1.385 +        break;
   1.386 +      case 0x03:  // mod == 11
   1.387 +        // According to the IA-32 docs, there does not seem to be a disp
   1.388 +        // value for this value of mod
   1.389 +      default:
   1.390 +        break;
   1.391 +    }
   1.392 +  }
   1.393 +
   1.394 +  (*size)++;
   1.395 +  return true;
   1.396 +}
   1.397 +
   1.398 +};  // namespace sidestep

mercurial