security/sandbox/win/src/sidestep/mini_disassembler.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

michael@0 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
michael@0 2 // Use of this source code is governed by a BSD-style license that can be
michael@0 3 // found in the LICENSE file.
michael@0 4
michael@0 5 // Implementation of MiniDisassembler.
michael@0 6
michael@0 7 #ifdef _WIN64
michael@0 8 #error The code in this file should not be used on 64-bit Windows.
michael@0 9 #endif
michael@0 10
michael@0 11 #include "sandbox/win/src/sidestep/mini_disassembler.h"
michael@0 12
michael@0 13 namespace sidestep {
michael@0 14
michael@0 15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
michael@0 16 bool address_default_is_32_bits)
michael@0 17 : operand_default_is_32_bits_(operand_default_is_32_bits),
michael@0 18 address_default_is_32_bits_(address_default_is_32_bits) {
michael@0 19 Initialize();
michael@0 20 }
michael@0 21
michael@0 22 MiniDisassembler::MiniDisassembler()
michael@0 23 : operand_default_is_32_bits_(true),
michael@0 24 address_default_is_32_bits_(true) {
michael@0 25 Initialize();
michael@0 26 }
michael@0 27
michael@0 28 InstructionType MiniDisassembler::Disassemble(
michael@0 29 unsigned char* start_byte,
michael@0 30 unsigned int* instruction_bytes) {
michael@0 31 // Clean up any state from previous invocations.
michael@0 32 Initialize();
michael@0 33
michael@0 34 // Start by processing any prefixes.
michael@0 35 unsigned char* current_byte = start_byte;
michael@0 36 unsigned int size = 0;
michael@0 37 InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
michael@0 38
michael@0 39 if (IT_UNKNOWN == instruction_type)
michael@0 40 return instruction_type;
michael@0 41
michael@0 42 current_byte += size;
michael@0 43 size = 0;
michael@0 44
michael@0 45 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
michael@0 46 // and address_is_32_bits_ flags are correctly set.
michael@0 47
michael@0 48 instruction_type = ProcessOpcode(current_byte, 0, &size);
michael@0 49
michael@0 50 // Check for error processing instruction
michael@0 51 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
michael@0 52 return IT_UNKNOWN;
michael@0 53 }
michael@0 54
michael@0 55 current_byte += size;
michael@0 56
michael@0 57 // Invariant: operand_bytes_ indicates the total size of operands
michael@0 58 // specified by the opcode and/or ModR/M byte and/or SIB byte.
michael@0 59 // pCurrentByte points to the first byte after the ModR/M byte, or after
michael@0 60 // the SIB byte if it is present (i.e. the first byte of any operands
michael@0 61 // encoded in the instruction).
michael@0 62
michael@0 63 // We get the total length of any prefixes, the opcode, and the ModR/M and
michael@0 64 // SIB bytes if present, by taking the difference of the original starting
michael@0 65 // address and the current byte (which points to the first byte of the
michael@0 66 // operands if present, or to the first byte of the next instruction if
michael@0 67 // they are not). Adding the count of bytes in the operands encoded in
michael@0 68 // the instruction gives us the full length of the instruction in bytes.
michael@0 69 *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
michael@0 70
michael@0 71 // Return the instruction type, which was set by ProcessOpcode().
michael@0 72 return instruction_type_;
michael@0 73 }
michael@0 74
michael@0 75 void MiniDisassembler::Initialize() {
michael@0 76 operand_is_32_bits_ = operand_default_is_32_bits_;
michael@0 77 address_is_32_bits_ = address_default_is_32_bits_;
michael@0 78 operand_bytes_ = 0;
michael@0 79 have_modrm_ = false;
michael@0 80 should_decode_modrm_ = false;
michael@0 81 instruction_type_ = IT_UNKNOWN;
michael@0 82 got_f2_prefix_ = false;
michael@0 83 got_f3_prefix_ = false;
michael@0 84 got_66_prefix_ = false;
michael@0 85 }
michael@0 86
michael@0 87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
michael@0 88 unsigned int* size) {
michael@0 89 InstructionType instruction_type = IT_GENERIC;
michael@0 90 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
michael@0 91
michael@0 92 switch (opcode.type_) {
michael@0 93 case IT_PREFIX_ADDRESS:
michael@0 94 address_is_32_bits_ = !address_default_is_32_bits_;
michael@0 95 goto nochangeoperand;
michael@0 96 case IT_PREFIX_OPERAND:
michael@0 97 operand_is_32_bits_ = !operand_default_is_32_bits_;
michael@0 98 nochangeoperand:
michael@0 99 case IT_PREFIX:
michael@0 100
michael@0 101 if (0xF2 == (*start_byte))
michael@0 102 got_f2_prefix_ = true;
michael@0 103 else if (0xF3 == (*start_byte))
michael@0 104 got_f3_prefix_ = true;
michael@0 105 else if (0x66 == (*start_byte))
michael@0 106 got_66_prefix_ = true;
michael@0 107
michael@0 108 instruction_type = opcode.type_;
michael@0 109 (*size)++;
michael@0 110 // we got a prefix, so add one and check next byte
michael@0 111 ProcessPrefixes(start_byte + 1, size);
michael@0 112 default:
michael@0 113 break; // not a prefix byte
michael@0 114 }
michael@0 115
michael@0 116 return instruction_type;
michael@0 117 }
michael@0 118
michael@0 119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
michael@0 120 unsigned int table_index,
michael@0 121 unsigned int* size) {
michael@0 122 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table
michael@0 123 unsigned char current_byte = (*start_byte) >> table.shift_;
michael@0 124 current_byte = current_byte & table.mask_; // Mask out the bits we will use
michael@0 125
michael@0 126 // Check whether the byte we have is inside the table we have.
michael@0 127 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
michael@0 128 instruction_type_ = IT_UNKNOWN;
michael@0 129 return instruction_type_;
michael@0 130 }
michael@0 131
michael@0 132 const Opcode& opcode = table.table_[current_byte];
michael@0 133 if (IT_UNUSED == opcode.type_) {
michael@0 134 // This instruction is not used by the IA-32 ISA, so we indicate
michael@0 135 // this to the user. Probably means that we were pointed to
michael@0 136 // a byte in memory that was not the start of an instruction.
michael@0 137 instruction_type_ = IT_UNUSED;
michael@0 138 return instruction_type_;
michael@0 139 } else if (IT_REFERENCE == opcode.type_) {
michael@0 140 // We are looking at an opcode that has more bytes (or is continued
michael@0 141 // in the ModR/M byte). Recursively find the opcode definition in
michael@0 142 // the table for the opcode's next byte.
michael@0 143 (*size)++;
michael@0 144 ProcessOpcode(start_byte + 1, opcode.table_index_, size);
michael@0 145 return instruction_type_;
michael@0 146 }
michael@0 147
michael@0 148 const SpecificOpcode* specific_opcode = reinterpret_cast<
michael@0 149 const SpecificOpcode*>(&opcode);
michael@0 150 if (opcode.is_prefix_dependent_) {
michael@0 151 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
michael@0 152 specific_opcode = &opcode.opcode_if_f2_prefix_;
michael@0 153 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
michael@0 154 specific_opcode = &opcode.opcode_if_f3_prefix_;
michael@0 155 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
michael@0 156 specific_opcode = &opcode.opcode_if_66_prefix_;
michael@0 157 }
michael@0 158 }
michael@0 159
michael@0 160 // Inv: The opcode type is known.
michael@0 161 instruction_type_ = specific_opcode->type_;
michael@0 162
michael@0 163 // Let's process the operand types to see if we have any immediate
michael@0 164 // operands, and/or a ModR/M byte.
michael@0 165
michael@0 166 ProcessOperand(specific_opcode->flag_dest_);
michael@0 167 ProcessOperand(specific_opcode->flag_source_);
michael@0 168 ProcessOperand(specific_opcode->flag_aux_);
michael@0 169
michael@0 170 // Inv: We have processed the opcode and incremented operand_bytes_
michael@0 171 // by the number of bytes of any operands specified by the opcode
michael@0 172 // that are stored in the instruction (not registers etc.). Now
michael@0 173 // we need to return the total number of bytes for the opcode and
michael@0 174 // for the ModR/M or SIB bytes if they are present.
michael@0 175
michael@0 176 if (table.mask_ != 0xff) {
michael@0 177 if (have_modrm_) {
michael@0 178 // we're looking at a ModR/M byte so we're not going to
michael@0 179 // count that into the opcode size
michael@0 180 ProcessModrm(start_byte, size);
michael@0 181 return IT_GENERIC;
michael@0 182 } else {
michael@0 183 // need to count the ModR/M byte even if it's just being
michael@0 184 // used for opcode extension
michael@0 185 (*size)++;
michael@0 186 return IT_GENERIC;
michael@0 187 }
michael@0 188 } else {
michael@0 189 if (have_modrm_) {
michael@0 190 // The ModR/M byte is the next byte.
michael@0 191 (*size)++;
michael@0 192 ProcessModrm(start_byte + 1, size);
michael@0 193 return IT_GENERIC;
michael@0 194 } else {
michael@0 195 (*size)++;
michael@0 196 return IT_GENERIC;
michael@0 197 }
michael@0 198 }
michael@0 199 }
michael@0 200
michael@0 201 bool MiniDisassembler::ProcessOperand(int flag_operand) {
michael@0 202 bool succeeded = true;
michael@0 203 if (AM_NOT_USED == flag_operand)
michael@0 204 return succeeded;
michael@0 205
michael@0 206 // Decide what to do based on the addressing mode.
michael@0 207 switch (flag_operand & AM_MASK) {
michael@0 208 // No ModR/M byte indicated by these addressing modes, and no
michael@0 209 // additional (e.g. immediate) parameters.
michael@0 210 case AM_A: // Direct address
michael@0 211 case AM_F: // EFLAGS register
michael@0 212 case AM_X: // Memory addressed by the DS:SI register pair
michael@0 213 case AM_Y: // Memory addressed by the ES:DI register pair
michael@0 214 case AM_IMPLICIT: // Parameter is implicit, occupies no space in
michael@0 215 // instruction
michael@0 216 break;
michael@0 217
michael@0 218 // There is a ModR/M byte but it does not necessarily need
michael@0 219 // to be decoded.
michael@0 220 case AM_C: // reg field of ModR/M selects a control register
michael@0 221 case AM_D: // reg field of ModR/M selects a debug register
michael@0 222 case AM_G: // reg field of ModR/M selects a general register
michael@0 223 case AM_P: // reg field of ModR/M selects an MMX register
michael@0 224 case AM_R: // mod field of ModR/M may refer only to a general register
michael@0 225 case AM_S: // reg field of ModR/M selects a segment register
michael@0 226 case AM_T: // reg field of ModR/M selects a test register
michael@0 227 case AM_V: // reg field of ModR/M selects a 128-bit XMM register
michael@0 228 have_modrm_ = true;
michael@0 229 break;
michael@0 230
michael@0 231 // In these addressing modes, there is a ModR/M byte and it needs to be
michael@0 232 // decoded. No other (e.g. immediate) params than indicated in ModR/M.
michael@0 233 case AM_E: // Operand is either a general-purpose register or memory,
michael@0 234 // specified by ModR/M byte
michael@0 235 case AM_M: // ModR/M byte will refer only to memory
michael@0 236 case AM_Q: // Operand is either an MMX register or memory (complex
michael@0 237 // evaluation), specified by ModR/M byte
michael@0 238 case AM_W: // Operand is either a 128-bit XMM register or memory (complex
michael@0 239 // eval), specified by ModR/M byte
michael@0 240 have_modrm_ = true;
michael@0 241 should_decode_modrm_ = true;
michael@0 242 break;
michael@0 243
michael@0 244 // These addressing modes specify an immediate or an offset value
michael@0 245 // directly, so we need to look at the operand type to see how many
michael@0 246 // bytes.
michael@0 247 case AM_I: // Immediate data.
michael@0 248 case AM_J: // Jump to offset.
michael@0 249 case AM_O: // Operand is at offset.
michael@0 250 switch (flag_operand & OT_MASK) {
michael@0 251 case OT_B: // Byte regardless of operand-size attribute.
michael@0 252 operand_bytes_ += OS_BYTE;
michael@0 253 break;
michael@0 254 case OT_C: // Byte or word, depending on operand-size attribute.
michael@0 255 if (operand_is_32_bits_)
michael@0 256 operand_bytes_ += OS_WORD;
michael@0 257 else
michael@0 258 operand_bytes_ += OS_BYTE;
michael@0 259 break;
michael@0 260 case OT_D: // Doubleword, regardless of operand-size attribute.
michael@0 261 operand_bytes_ += OS_DOUBLE_WORD;
michael@0 262 break;
michael@0 263 case OT_DQ: // Double-quadword, regardless of operand-size attribute.
michael@0 264 operand_bytes_ += OS_DOUBLE_QUAD_WORD;
michael@0 265 break;
michael@0 266 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size
michael@0 267 // attribute.
michael@0 268 if (operand_is_32_bits_)
michael@0 269 operand_bytes_ += OS_48_BIT_POINTER;
michael@0 270 else
michael@0 271 operand_bytes_ += OS_32_BIT_POINTER;
michael@0 272 break;
michael@0 273 case OT_PS: // 128-bit packed single-precision floating-point data.
michael@0 274 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
michael@0 275 break;
michael@0 276 case OT_Q: // Quadword, regardless of operand-size attribute.
michael@0 277 operand_bytes_ += OS_QUAD_WORD;
michael@0 278 break;
michael@0 279 case OT_S: // 6-byte pseudo-descriptor.
michael@0 280 operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
michael@0 281 break;
michael@0 282 case OT_SD: // Scalar Double-Precision Floating-Point Value
michael@0 283 case OT_PD: // Unaligned packed double-precision floating point value
michael@0 284 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
michael@0 285 break;
michael@0 286 case OT_SS:
michael@0 287 // Scalar element of a 128-bit packed single-precision
michael@0 288 // floating data.
michael@0 289 // We simply return enItUnknown since we don't have to support
michael@0 290 // floating point
michael@0 291 succeeded = false;
michael@0 292 break;
michael@0 293 case OT_V: // Word or doubleword, depending on operand-size attribute.
michael@0 294 if (operand_is_32_bits_)
michael@0 295 operand_bytes_ += OS_DOUBLE_WORD;
michael@0 296 else
michael@0 297 operand_bytes_ += OS_WORD;
michael@0 298 break;
michael@0 299 case OT_W: // Word, regardless of operand-size attribute.
michael@0 300 operand_bytes_ += OS_WORD;
michael@0 301 break;
michael@0 302
michael@0 303 // Can safely ignore these.
michael@0 304 case OT_A: // Two one-word operands in memory or two double-word
michael@0 305 // operands in memory
michael@0 306 case OT_PI: // Quadword MMX technology register (e.g. mm0)
michael@0 307 case OT_SI: // Doubleword integer register (e.g., eax)
michael@0 308 break;
michael@0 309
michael@0 310 default:
michael@0 311 break;
michael@0 312 }
michael@0 313 break;
michael@0 314
michael@0 315 default:
michael@0 316 break;
michael@0 317 }
michael@0 318
michael@0 319 return succeeded;
michael@0 320 }
michael@0 321
michael@0 322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
michael@0 323 unsigned int* size) {
michael@0 324 // If we don't need to decode, we just return the size of the ModR/M
michael@0 325 // byte (there is never a SIB byte in this case).
michael@0 326 if (!should_decode_modrm_) {
michael@0 327 (*size)++;
michael@0 328 return true;
michael@0 329 }
michael@0 330
michael@0 331 // We never care about the reg field, only the combination of the mod
michael@0 332 // and r/m fields, so let's start by packing those fields together into
michael@0 333 // 5 bits.
michael@0 334 unsigned char modrm = (*start_byte);
michael@0 335 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field
michael@0 336 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field
michael@0 337 mod = mod >> 3; // shift the mod field to the right place
michael@0 338 modrm = mod | modrm; // combine the r/m and mod fields as discussed
michael@0 339 mod = mod >> 3; // shift the mod field to bits 2..0
michael@0 340
michael@0 341 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
michael@0 342 // in bits 2..0, and mod contains the mod field in bits 2..0
michael@0 343
michael@0 344 const ModrmEntry* modrm_entry = 0;
michael@0 345 if (address_is_32_bits_)
michael@0 346 modrm_entry = &s_ia32_modrm_map_[modrm];
michael@0 347 else
michael@0 348 modrm_entry = &s_ia16_modrm_map_[modrm];
michael@0 349
michael@0 350 // Invariant: modrm_entry points to information that we need to decode
michael@0 351 // the ModR/M byte.
michael@0 352
michael@0 353 // Add to the count of operand bytes, if the ModR/M byte indicates
michael@0 354 // that some operands are encoded in the instruction.
michael@0 355 if (modrm_entry->is_encoded_in_instruction_)
michael@0 356 operand_bytes_ += modrm_entry->operand_size_;
michael@0 357
michael@0 358 // Process the SIB byte if necessary, and return the count
michael@0 359 // of ModR/M and SIB bytes.
michael@0 360 if (modrm_entry->use_sib_byte_) {
michael@0 361 (*size)++;
michael@0 362 return ProcessSib(start_byte + 1, mod, size);
michael@0 363 } else {
michael@0 364 (*size)++;
michael@0 365 return true;
michael@0 366 }
michael@0 367 }
michael@0 368
michael@0 369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
michael@0 370 unsigned char mod,
michael@0 371 unsigned int* size) {
michael@0 372 // get the mod field from the 2..0 bits of the SIB byte
michael@0 373 unsigned char sib_base = (*start_byte) & 0x07;
michael@0 374 if (0x05 == sib_base) {
michael@0 375 switch (mod) {
michael@0 376 case 0x00: // mod == 00
michael@0 377 case 0x02: // mod == 10
michael@0 378 operand_bytes_ += OS_DOUBLE_WORD;
michael@0 379 break;
michael@0 380 case 0x01: // mod == 01
michael@0 381 operand_bytes_ += OS_BYTE;
michael@0 382 break;
michael@0 383 case 0x03: // mod == 11
michael@0 384 // According to the IA-32 docs, there does not seem to be a disp
michael@0 385 // value for this value of mod
michael@0 386 default:
michael@0 387 break;
michael@0 388 }
michael@0 389 }
michael@0 390
michael@0 391 (*size)++;
michael@0 392 return true;
michael@0 393 }
michael@0 394
michael@0 395 }; // namespace sidestep

mercurial