1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/security/sandbox/win/src/sidestep/mini_disassembler.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,395 @@ 1.4 +// Copyright (c) 2012 The Chromium Authors. All rights reserved. 1.5 +// Use of this source code is governed by a BSD-style license that can be 1.6 +// found in the LICENSE file. 1.7 + 1.8 +// Implementation of MiniDisassembler. 1.9 + 1.10 +#ifdef _WIN64 1.11 +#error The code in this file should not be used on 64-bit Windows. 1.12 +#endif 1.13 + 1.14 +#include "sandbox/win/src/sidestep/mini_disassembler.h" 1.15 + 1.16 +namespace sidestep { 1.17 + 1.18 +MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, 1.19 + bool address_default_is_32_bits) 1.20 + : operand_default_is_32_bits_(operand_default_is_32_bits), 1.21 + address_default_is_32_bits_(address_default_is_32_bits) { 1.22 + Initialize(); 1.23 +} 1.24 + 1.25 +MiniDisassembler::MiniDisassembler() 1.26 + : operand_default_is_32_bits_(true), 1.27 + address_default_is_32_bits_(true) { 1.28 + Initialize(); 1.29 +} 1.30 + 1.31 +InstructionType MiniDisassembler::Disassemble( 1.32 + unsigned char* start_byte, 1.33 + unsigned int* instruction_bytes) { 1.34 + // Clean up any state from previous invocations. 1.35 + Initialize(); 1.36 + 1.37 + // Start by processing any prefixes. 1.38 + unsigned char* current_byte = start_byte; 1.39 + unsigned int size = 0; 1.40 + InstructionType instruction_type = ProcessPrefixes(current_byte, &size); 1.41 + 1.42 + if (IT_UNKNOWN == instruction_type) 1.43 + return instruction_type; 1.44 + 1.45 + current_byte += size; 1.46 + size = 0; 1.47 + 1.48 + // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ 1.49 + // and address_is_32_bits_ flags are correctly set. 1.50 + 1.51 + instruction_type = ProcessOpcode(current_byte, 0, &size); 1.52 + 1.53 + // Check for error processing instruction 1.54 + if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { 1.55 + return IT_UNKNOWN; 1.56 + } 1.57 + 1.58 + current_byte += size; 1.59 + 1.60 + // Invariant: operand_bytes_ indicates the total size of operands 1.61 + // specified by the opcode and/or ModR/M byte and/or SIB byte. 1.62 + // pCurrentByte points to the first byte after the ModR/M byte, or after 1.63 + // the SIB byte if it is present (i.e. the first byte of any operands 1.64 + // encoded in the instruction). 1.65 + 1.66 + // We get the total length of any prefixes, the opcode, and the ModR/M and 1.67 + // SIB bytes if present, by taking the difference of the original starting 1.68 + // address and the current byte (which points to the first byte of the 1.69 + // operands if present, or to the first byte of the next instruction if 1.70 + // they are not). Adding the count of bytes in the operands encoded in 1.71 + // the instruction gives us the full length of the instruction in bytes. 1.72 + *instruction_bytes += operand_bytes_ + (current_byte - start_byte); 1.73 + 1.74 + // Return the instruction type, which was set by ProcessOpcode(). 1.75 + return instruction_type_; 1.76 +} 1.77 + 1.78 +void MiniDisassembler::Initialize() { 1.79 + operand_is_32_bits_ = operand_default_is_32_bits_; 1.80 + address_is_32_bits_ = address_default_is_32_bits_; 1.81 + operand_bytes_ = 0; 1.82 + have_modrm_ = false; 1.83 + should_decode_modrm_ = false; 1.84 + instruction_type_ = IT_UNKNOWN; 1.85 + got_f2_prefix_ = false; 1.86 + got_f3_prefix_ = false; 1.87 + got_66_prefix_ = false; 1.88 +} 1.89 + 1.90 +InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, 1.91 + unsigned int* size) { 1.92 + InstructionType instruction_type = IT_GENERIC; 1.93 + const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; 1.94 + 1.95 + switch (opcode.type_) { 1.96 + case IT_PREFIX_ADDRESS: 1.97 + address_is_32_bits_ = !address_default_is_32_bits_; 1.98 + goto nochangeoperand; 1.99 + case IT_PREFIX_OPERAND: 1.100 + operand_is_32_bits_ = !operand_default_is_32_bits_; 1.101 + nochangeoperand: 1.102 + case IT_PREFIX: 1.103 + 1.104 + if (0xF2 == (*start_byte)) 1.105 + got_f2_prefix_ = true; 1.106 + else if (0xF3 == (*start_byte)) 1.107 + got_f3_prefix_ = true; 1.108 + else if (0x66 == (*start_byte)) 1.109 + got_66_prefix_ = true; 1.110 + 1.111 + instruction_type = opcode.type_; 1.112 + (*size)++; 1.113 + // we got a prefix, so add one and check next byte 1.114 + ProcessPrefixes(start_byte + 1, size); 1.115 + default: 1.116 + break; // not a prefix byte 1.117 + } 1.118 + 1.119 + return instruction_type; 1.120 +} 1.121 + 1.122 +InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, 1.123 + unsigned int table_index, 1.124 + unsigned int* size) { 1.125 + const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table 1.126 + unsigned char current_byte = (*start_byte) >> table.shift_; 1.127 + current_byte = current_byte & table.mask_; // Mask out the bits we will use 1.128 + 1.129 + // Check whether the byte we have is inside the table we have. 1.130 + if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { 1.131 + instruction_type_ = IT_UNKNOWN; 1.132 + return instruction_type_; 1.133 + } 1.134 + 1.135 + const Opcode& opcode = table.table_[current_byte]; 1.136 + if (IT_UNUSED == opcode.type_) { 1.137 + // This instruction is not used by the IA-32 ISA, so we indicate 1.138 + // this to the user. Probably means that we were pointed to 1.139 + // a byte in memory that was not the start of an instruction. 1.140 + instruction_type_ = IT_UNUSED; 1.141 + return instruction_type_; 1.142 + } else if (IT_REFERENCE == opcode.type_) { 1.143 + // We are looking at an opcode that has more bytes (or is continued 1.144 + // in the ModR/M byte). Recursively find the opcode definition in 1.145 + // the table for the opcode's next byte. 1.146 + (*size)++; 1.147 + ProcessOpcode(start_byte + 1, opcode.table_index_, size); 1.148 + return instruction_type_; 1.149 + } 1.150 + 1.151 + const SpecificOpcode* specific_opcode = reinterpret_cast< 1.152 + const SpecificOpcode*>(&opcode); 1.153 + if (opcode.is_prefix_dependent_) { 1.154 + if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { 1.155 + specific_opcode = &opcode.opcode_if_f2_prefix_; 1.156 + } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { 1.157 + specific_opcode = &opcode.opcode_if_f3_prefix_; 1.158 + } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { 1.159 + specific_opcode = &opcode.opcode_if_66_prefix_; 1.160 + } 1.161 + } 1.162 + 1.163 + // Inv: The opcode type is known. 1.164 + instruction_type_ = specific_opcode->type_; 1.165 + 1.166 + // Let's process the operand types to see if we have any immediate 1.167 + // operands, and/or a ModR/M byte. 1.168 + 1.169 + ProcessOperand(specific_opcode->flag_dest_); 1.170 + ProcessOperand(specific_opcode->flag_source_); 1.171 + ProcessOperand(specific_opcode->flag_aux_); 1.172 + 1.173 + // Inv: We have processed the opcode and incremented operand_bytes_ 1.174 + // by the number of bytes of any operands specified by the opcode 1.175 + // that are stored in the instruction (not registers etc.). Now 1.176 + // we need to return the total number of bytes for the opcode and 1.177 + // for the ModR/M or SIB bytes if they are present. 1.178 + 1.179 + if (table.mask_ != 0xff) { 1.180 + if (have_modrm_) { 1.181 + // we're looking at a ModR/M byte so we're not going to 1.182 + // count that into the opcode size 1.183 + ProcessModrm(start_byte, size); 1.184 + return IT_GENERIC; 1.185 + } else { 1.186 + // need to count the ModR/M byte even if it's just being 1.187 + // used for opcode extension 1.188 + (*size)++; 1.189 + return IT_GENERIC; 1.190 + } 1.191 + } else { 1.192 + if (have_modrm_) { 1.193 + // The ModR/M byte is the next byte. 1.194 + (*size)++; 1.195 + ProcessModrm(start_byte + 1, size); 1.196 + return IT_GENERIC; 1.197 + } else { 1.198 + (*size)++; 1.199 + return IT_GENERIC; 1.200 + } 1.201 + } 1.202 +} 1.203 + 1.204 +bool MiniDisassembler::ProcessOperand(int flag_operand) { 1.205 + bool succeeded = true; 1.206 + if (AM_NOT_USED == flag_operand) 1.207 + return succeeded; 1.208 + 1.209 + // Decide what to do based on the addressing mode. 1.210 + switch (flag_operand & AM_MASK) { 1.211 + // No ModR/M byte indicated by these addressing modes, and no 1.212 + // additional (e.g. immediate) parameters. 1.213 + case AM_A: // Direct address 1.214 + case AM_F: // EFLAGS register 1.215 + case AM_X: // Memory addressed by the DS:SI register pair 1.216 + case AM_Y: // Memory addressed by the ES:DI register pair 1.217 + case AM_IMPLICIT: // Parameter is implicit, occupies no space in 1.218 + // instruction 1.219 + break; 1.220 + 1.221 + // There is a ModR/M byte but it does not necessarily need 1.222 + // to be decoded. 1.223 + case AM_C: // reg field of ModR/M selects a control register 1.224 + case AM_D: // reg field of ModR/M selects a debug register 1.225 + case AM_G: // reg field of ModR/M selects a general register 1.226 + case AM_P: // reg field of ModR/M selects an MMX register 1.227 + case AM_R: // mod field of ModR/M may refer only to a general register 1.228 + case AM_S: // reg field of ModR/M selects a segment register 1.229 + case AM_T: // reg field of ModR/M selects a test register 1.230 + case AM_V: // reg field of ModR/M selects a 128-bit XMM register 1.231 + have_modrm_ = true; 1.232 + break; 1.233 + 1.234 + // In these addressing modes, there is a ModR/M byte and it needs to be 1.235 + // decoded. No other (e.g. immediate) params than indicated in ModR/M. 1.236 + case AM_E: // Operand is either a general-purpose register or memory, 1.237 + // specified by ModR/M byte 1.238 + case AM_M: // ModR/M byte will refer only to memory 1.239 + case AM_Q: // Operand is either an MMX register or memory (complex 1.240 + // evaluation), specified by ModR/M byte 1.241 + case AM_W: // Operand is either a 128-bit XMM register or memory (complex 1.242 + // eval), specified by ModR/M byte 1.243 + have_modrm_ = true; 1.244 + should_decode_modrm_ = true; 1.245 + break; 1.246 + 1.247 + // These addressing modes specify an immediate or an offset value 1.248 + // directly, so we need to look at the operand type to see how many 1.249 + // bytes. 1.250 + case AM_I: // Immediate data. 1.251 + case AM_J: // Jump to offset. 1.252 + case AM_O: // Operand is at offset. 1.253 + switch (flag_operand & OT_MASK) { 1.254 + case OT_B: // Byte regardless of operand-size attribute. 1.255 + operand_bytes_ += OS_BYTE; 1.256 + break; 1.257 + case OT_C: // Byte or word, depending on operand-size attribute. 1.258 + if (operand_is_32_bits_) 1.259 + operand_bytes_ += OS_WORD; 1.260 + else 1.261 + operand_bytes_ += OS_BYTE; 1.262 + break; 1.263 + case OT_D: // Doubleword, regardless of operand-size attribute. 1.264 + operand_bytes_ += OS_DOUBLE_WORD; 1.265 + break; 1.266 + case OT_DQ: // Double-quadword, regardless of operand-size attribute. 1.267 + operand_bytes_ += OS_DOUBLE_QUAD_WORD; 1.268 + break; 1.269 + case OT_P: // 32-bit or 48-bit pointer, depending on operand-size 1.270 + // attribute. 1.271 + if (operand_is_32_bits_) 1.272 + operand_bytes_ += OS_48_BIT_POINTER; 1.273 + else 1.274 + operand_bytes_ += OS_32_BIT_POINTER; 1.275 + break; 1.276 + case OT_PS: // 128-bit packed single-precision floating-point data. 1.277 + operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; 1.278 + break; 1.279 + case OT_Q: // Quadword, regardless of operand-size attribute. 1.280 + operand_bytes_ += OS_QUAD_WORD; 1.281 + break; 1.282 + case OT_S: // 6-byte pseudo-descriptor. 1.283 + operand_bytes_ += OS_PSEUDO_DESCRIPTOR; 1.284 + break; 1.285 + case OT_SD: // Scalar Double-Precision Floating-Point Value 1.286 + case OT_PD: // Unaligned packed double-precision floating point value 1.287 + operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; 1.288 + break; 1.289 + case OT_SS: 1.290 + // Scalar element of a 128-bit packed single-precision 1.291 + // floating data. 1.292 + // We simply return enItUnknown since we don't have to support 1.293 + // floating point 1.294 + succeeded = false; 1.295 + break; 1.296 + case OT_V: // Word or doubleword, depending on operand-size attribute. 1.297 + if (operand_is_32_bits_) 1.298 + operand_bytes_ += OS_DOUBLE_WORD; 1.299 + else 1.300 + operand_bytes_ += OS_WORD; 1.301 + break; 1.302 + case OT_W: // Word, regardless of operand-size attribute. 1.303 + operand_bytes_ += OS_WORD; 1.304 + break; 1.305 + 1.306 + // Can safely ignore these. 1.307 + case OT_A: // Two one-word operands in memory or two double-word 1.308 + // operands in memory 1.309 + case OT_PI: // Quadword MMX technology register (e.g. mm0) 1.310 + case OT_SI: // Doubleword integer register (e.g., eax) 1.311 + break; 1.312 + 1.313 + default: 1.314 + break; 1.315 + } 1.316 + break; 1.317 + 1.318 + default: 1.319 + break; 1.320 + } 1.321 + 1.322 + return succeeded; 1.323 +} 1.324 + 1.325 +bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, 1.326 + unsigned int* size) { 1.327 + // If we don't need to decode, we just return the size of the ModR/M 1.328 + // byte (there is never a SIB byte in this case). 1.329 + if (!should_decode_modrm_) { 1.330 + (*size)++; 1.331 + return true; 1.332 + } 1.333 + 1.334 + // We never care about the reg field, only the combination of the mod 1.335 + // and r/m fields, so let's start by packing those fields together into 1.336 + // 5 bits. 1.337 + unsigned char modrm = (*start_byte); 1.338 + unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field 1.339 + modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field 1.340 + mod = mod >> 3; // shift the mod field to the right place 1.341 + modrm = mod | modrm; // combine the r/m and mod fields as discussed 1.342 + mod = mod >> 3; // shift the mod field to bits 2..0 1.343 + 1.344 + // Invariant: modrm contains the mod field in bits 4..3 and the r/m field 1.345 + // in bits 2..0, and mod contains the mod field in bits 2..0 1.346 + 1.347 + const ModrmEntry* modrm_entry = 0; 1.348 + if (address_is_32_bits_) 1.349 + modrm_entry = &s_ia32_modrm_map_[modrm]; 1.350 + else 1.351 + modrm_entry = &s_ia16_modrm_map_[modrm]; 1.352 + 1.353 + // Invariant: modrm_entry points to information that we need to decode 1.354 + // the ModR/M byte. 1.355 + 1.356 + // Add to the count of operand bytes, if the ModR/M byte indicates 1.357 + // that some operands are encoded in the instruction. 1.358 + if (modrm_entry->is_encoded_in_instruction_) 1.359 + operand_bytes_ += modrm_entry->operand_size_; 1.360 + 1.361 + // Process the SIB byte if necessary, and return the count 1.362 + // of ModR/M and SIB bytes. 1.363 + if (modrm_entry->use_sib_byte_) { 1.364 + (*size)++; 1.365 + return ProcessSib(start_byte + 1, mod, size); 1.366 + } else { 1.367 + (*size)++; 1.368 + return true; 1.369 + } 1.370 +} 1.371 + 1.372 +bool MiniDisassembler::ProcessSib(unsigned char* start_byte, 1.373 + unsigned char mod, 1.374 + unsigned int* size) { 1.375 + // get the mod field from the 2..0 bits of the SIB byte 1.376 + unsigned char sib_base = (*start_byte) & 0x07; 1.377 + if (0x05 == sib_base) { 1.378 + switch (mod) { 1.379 + case 0x00: // mod == 00 1.380 + case 0x02: // mod == 10 1.381 + operand_bytes_ += OS_DOUBLE_WORD; 1.382 + break; 1.383 + case 0x01: // mod == 01 1.384 + operand_bytes_ += OS_BYTE; 1.385 + break; 1.386 + case 0x03: // mod == 11 1.387 + // According to the IA-32 docs, there does not seem to be a disp 1.388 + // value for this value of mod 1.389 + default: 1.390 + break; 1.391 + } 1.392 + } 1.393 + 1.394 + (*size)++; 1.395 + return true; 1.396 +} 1.397 + 1.398 +}; // namespace sidestep