Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
michael@0 | 2 | // Use of this source code is governed by a BSD-style license that can be |
michael@0 | 3 | // found in the LICENSE file. |
michael@0 | 4 | |
michael@0 | 5 | // Implementation of MiniDisassembler. |
michael@0 | 6 | |
michael@0 | 7 | #ifdef _WIN64 |
michael@0 | 8 | #error The code in this file should not be used on 64-bit Windows. |
michael@0 | 9 | #endif |
michael@0 | 10 | |
michael@0 | 11 | #include "sandbox/win/src/sidestep/mini_disassembler.h" |
michael@0 | 12 | |
michael@0 | 13 | namespace sidestep { |
michael@0 | 14 | |
michael@0 | 15 | MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, |
michael@0 | 16 | bool address_default_is_32_bits) |
michael@0 | 17 | : operand_default_is_32_bits_(operand_default_is_32_bits), |
michael@0 | 18 | address_default_is_32_bits_(address_default_is_32_bits) { |
michael@0 | 19 | Initialize(); |
michael@0 | 20 | } |
michael@0 | 21 | |
michael@0 | 22 | MiniDisassembler::MiniDisassembler() |
michael@0 | 23 | : operand_default_is_32_bits_(true), |
michael@0 | 24 | address_default_is_32_bits_(true) { |
michael@0 | 25 | Initialize(); |
michael@0 | 26 | } |
michael@0 | 27 | |
michael@0 | 28 | InstructionType MiniDisassembler::Disassemble( |
michael@0 | 29 | unsigned char* start_byte, |
michael@0 | 30 | unsigned int* instruction_bytes) { |
michael@0 | 31 | // Clean up any state from previous invocations. |
michael@0 | 32 | Initialize(); |
michael@0 | 33 | |
michael@0 | 34 | // Start by processing any prefixes. |
michael@0 | 35 | unsigned char* current_byte = start_byte; |
michael@0 | 36 | unsigned int size = 0; |
michael@0 | 37 | InstructionType instruction_type = ProcessPrefixes(current_byte, &size); |
michael@0 | 38 | |
michael@0 | 39 | if (IT_UNKNOWN == instruction_type) |
michael@0 | 40 | return instruction_type; |
michael@0 | 41 | |
michael@0 | 42 | current_byte += size; |
michael@0 | 43 | size = 0; |
michael@0 | 44 | |
michael@0 | 45 | // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ |
michael@0 | 46 | // and address_is_32_bits_ flags are correctly set. |
michael@0 | 47 | |
michael@0 | 48 | instruction_type = ProcessOpcode(current_byte, 0, &size); |
michael@0 | 49 | |
michael@0 | 50 | // Check for error processing instruction |
michael@0 | 51 | if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { |
michael@0 | 52 | return IT_UNKNOWN; |
michael@0 | 53 | } |
michael@0 | 54 | |
michael@0 | 55 | current_byte += size; |
michael@0 | 56 | |
michael@0 | 57 | // Invariant: operand_bytes_ indicates the total size of operands |
michael@0 | 58 | // specified by the opcode and/or ModR/M byte and/or SIB byte. |
michael@0 | 59 | // pCurrentByte points to the first byte after the ModR/M byte, or after |
michael@0 | 60 | // the SIB byte if it is present (i.e. the first byte of any operands |
michael@0 | 61 | // encoded in the instruction). |
michael@0 | 62 | |
michael@0 | 63 | // We get the total length of any prefixes, the opcode, and the ModR/M and |
michael@0 | 64 | // SIB bytes if present, by taking the difference of the original starting |
michael@0 | 65 | // address and the current byte (which points to the first byte of the |
michael@0 | 66 | // operands if present, or to the first byte of the next instruction if |
michael@0 | 67 | // they are not). Adding the count of bytes in the operands encoded in |
michael@0 | 68 | // the instruction gives us the full length of the instruction in bytes. |
michael@0 | 69 | *instruction_bytes += operand_bytes_ + (current_byte - start_byte); |
michael@0 | 70 | |
michael@0 | 71 | // Return the instruction type, which was set by ProcessOpcode(). |
michael@0 | 72 | return instruction_type_; |
michael@0 | 73 | } |
michael@0 | 74 | |
michael@0 | 75 | void MiniDisassembler::Initialize() { |
michael@0 | 76 | operand_is_32_bits_ = operand_default_is_32_bits_; |
michael@0 | 77 | address_is_32_bits_ = address_default_is_32_bits_; |
michael@0 | 78 | operand_bytes_ = 0; |
michael@0 | 79 | have_modrm_ = false; |
michael@0 | 80 | should_decode_modrm_ = false; |
michael@0 | 81 | instruction_type_ = IT_UNKNOWN; |
michael@0 | 82 | got_f2_prefix_ = false; |
michael@0 | 83 | got_f3_prefix_ = false; |
michael@0 | 84 | got_66_prefix_ = false; |
michael@0 | 85 | } |
michael@0 | 86 | |
michael@0 | 87 | InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, |
michael@0 | 88 | unsigned int* size) { |
michael@0 | 89 | InstructionType instruction_type = IT_GENERIC; |
michael@0 | 90 | const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; |
michael@0 | 91 | |
michael@0 | 92 | switch (opcode.type_) { |
michael@0 | 93 | case IT_PREFIX_ADDRESS: |
michael@0 | 94 | address_is_32_bits_ = !address_default_is_32_bits_; |
michael@0 | 95 | goto nochangeoperand; |
michael@0 | 96 | case IT_PREFIX_OPERAND: |
michael@0 | 97 | operand_is_32_bits_ = !operand_default_is_32_bits_; |
michael@0 | 98 | nochangeoperand: |
michael@0 | 99 | case IT_PREFIX: |
michael@0 | 100 | |
michael@0 | 101 | if (0xF2 == (*start_byte)) |
michael@0 | 102 | got_f2_prefix_ = true; |
michael@0 | 103 | else if (0xF3 == (*start_byte)) |
michael@0 | 104 | got_f3_prefix_ = true; |
michael@0 | 105 | else if (0x66 == (*start_byte)) |
michael@0 | 106 | got_66_prefix_ = true; |
michael@0 | 107 | |
michael@0 | 108 | instruction_type = opcode.type_; |
michael@0 | 109 | (*size)++; |
michael@0 | 110 | // we got a prefix, so add one and check next byte |
michael@0 | 111 | ProcessPrefixes(start_byte + 1, size); |
michael@0 | 112 | default: |
michael@0 | 113 | break; // not a prefix byte |
michael@0 | 114 | } |
michael@0 | 115 | |
michael@0 | 116 | return instruction_type; |
michael@0 | 117 | } |
michael@0 | 118 | |
michael@0 | 119 | InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, |
michael@0 | 120 | unsigned int table_index, |
michael@0 | 121 | unsigned int* size) { |
michael@0 | 122 | const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table |
michael@0 | 123 | unsigned char current_byte = (*start_byte) >> table.shift_; |
michael@0 | 124 | current_byte = current_byte & table.mask_; // Mask out the bits we will use |
michael@0 | 125 | |
michael@0 | 126 | // Check whether the byte we have is inside the table we have. |
michael@0 | 127 | if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { |
michael@0 | 128 | instruction_type_ = IT_UNKNOWN; |
michael@0 | 129 | return instruction_type_; |
michael@0 | 130 | } |
michael@0 | 131 | |
michael@0 | 132 | const Opcode& opcode = table.table_[current_byte]; |
michael@0 | 133 | if (IT_UNUSED == opcode.type_) { |
michael@0 | 134 | // This instruction is not used by the IA-32 ISA, so we indicate |
michael@0 | 135 | // this to the user. Probably means that we were pointed to |
michael@0 | 136 | // a byte in memory that was not the start of an instruction. |
michael@0 | 137 | instruction_type_ = IT_UNUSED; |
michael@0 | 138 | return instruction_type_; |
michael@0 | 139 | } else if (IT_REFERENCE == opcode.type_) { |
michael@0 | 140 | // We are looking at an opcode that has more bytes (or is continued |
michael@0 | 141 | // in the ModR/M byte). Recursively find the opcode definition in |
michael@0 | 142 | // the table for the opcode's next byte. |
michael@0 | 143 | (*size)++; |
michael@0 | 144 | ProcessOpcode(start_byte + 1, opcode.table_index_, size); |
michael@0 | 145 | return instruction_type_; |
michael@0 | 146 | } |
michael@0 | 147 | |
michael@0 | 148 | const SpecificOpcode* specific_opcode = reinterpret_cast< |
michael@0 | 149 | const SpecificOpcode*>(&opcode); |
michael@0 | 150 | if (opcode.is_prefix_dependent_) { |
michael@0 | 151 | if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { |
michael@0 | 152 | specific_opcode = &opcode.opcode_if_f2_prefix_; |
michael@0 | 153 | } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { |
michael@0 | 154 | specific_opcode = &opcode.opcode_if_f3_prefix_; |
michael@0 | 155 | } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { |
michael@0 | 156 | specific_opcode = &opcode.opcode_if_66_prefix_; |
michael@0 | 157 | } |
michael@0 | 158 | } |
michael@0 | 159 | |
michael@0 | 160 | // Inv: The opcode type is known. |
michael@0 | 161 | instruction_type_ = specific_opcode->type_; |
michael@0 | 162 | |
michael@0 | 163 | // Let's process the operand types to see if we have any immediate |
michael@0 | 164 | // operands, and/or a ModR/M byte. |
michael@0 | 165 | |
michael@0 | 166 | ProcessOperand(specific_opcode->flag_dest_); |
michael@0 | 167 | ProcessOperand(specific_opcode->flag_source_); |
michael@0 | 168 | ProcessOperand(specific_opcode->flag_aux_); |
michael@0 | 169 | |
michael@0 | 170 | // Inv: We have processed the opcode and incremented operand_bytes_ |
michael@0 | 171 | // by the number of bytes of any operands specified by the opcode |
michael@0 | 172 | // that are stored in the instruction (not registers etc.). Now |
michael@0 | 173 | // we need to return the total number of bytes for the opcode and |
michael@0 | 174 | // for the ModR/M or SIB bytes if they are present. |
michael@0 | 175 | |
michael@0 | 176 | if (table.mask_ != 0xff) { |
michael@0 | 177 | if (have_modrm_) { |
michael@0 | 178 | // we're looking at a ModR/M byte so we're not going to |
michael@0 | 179 | // count that into the opcode size |
michael@0 | 180 | ProcessModrm(start_byte, size); |
michael@0 | 181 | return IT_GENERIC; |
michael@0 | 182 | } else { |
michael@0 | 183 | // need to count the ModR/M byte even if it's just being |
michael@0 | 184 | // used for opcode extension |
michael@0 | 185 | (*size)++; |
michael@0 | 186 | return IT_GENERIC; |
michael@0 | 187 | } |
michael@0 | 188 | } else { |
michael@0 | 189 | if (have_modrm_) { |
michael@0 | 190 | // The ModR/M byte is the next byte. |
michael@0 | 191 | (*size)++; |
michael@0 | 192 | ProcessModrm(start_byte + 1, size); |
michael@0 | 193 | return IT_GENERIC; |
michael@0 | 194 | } else { |
michael@0 | 195 | (*size)++; |
michael@0 | 196 | return IT_GENERIC; |
michael@0 | 197 | } |
michael@0 | 198 | } |
michael@0 | 199 | } |
michael@0 | 200 | |
michael@0 | 201 | bool MiniDisassembler::ProcessOperand(int flag_operand) { |
michael@0 | 202 | bool succeeded = true; |
michael@0 | 203 | if (AM_NOT_USED == flag_operand) |
michael@0 | 204 | return succeeded; |
michael@0 | 205 | |
michael@0 | 206 | // Decide what to do based on the addressing mode. |
michael@0 | 207 | switch (flag_operand & AM_MASK) { |
michael@0 | 208 | // No ModR/M byte indicated by these addressing modes, and no |
michael@0 | 209 | // additional (e.g. immediate) parameters. |
michael@0 | 210 | case AM_A: // Direct address |
michael@0 | 211 | case AM_F: // EFLAGS register |
michael@0 | 212 | case AM_X: // Memory addressed by the DS:SI register pair |
michael@0 | 213 | case AM_Y: // Memory addressed by the ES:DI register pair |
michael@0 | 214 | case AM_IMPLICIT: // Parameter is implicit, occupies no space in |
michael@0 | 215 | // instruction |
michael@0 | 216 | break; |
michael@0 | 217 | |
michael@0 | 218 | // There is a ModR/M byte but it does not necessarily need |
michael@0 | 219 | // to be decoded. |
michael@0 | 220 | case AM_C: // reg field of ModR/M selects a control register |
michael@0 | 221 | case AM_D: // reg field of ModR/M selects a debug register |
michael@0 | 222 | case AM_G: // reg field of ModR/M selects a general register |
michael@0 | 223 | case AM_P: // reg field of ModR/M selects an MMX register |
michael@0 | 224 | case AM_R: // mod field of ModR/M may refer only to a general register |
michael@0 | 225 | case AM_S: // reg field of ModR/M selects a segment register |
michael@0 | 226 | case AM_T: // reg field of ModR/M selects a test register |
michael@0 | 227 | case AM_V: // reg field of ModR/M selects a 128-bit XMM register |
michael@0 | 228 | have_modrm_ = true; |
michael@0 | 229 | break; |
michael@0 | 230 | |
michael@0 | 231 | // In these addressing modes, there is a ModR/M byte and it needs to be |
michael@0 | 232 | // decoded. No other (e.g. immediate) params than indicated in ModR/M. |
michael@0 | 233 | case AM_E: // Operand is either a general-purpose register or memory, |
michael@0 | 234 | // specified by ModR/M byte |
michael@0 | 235 | case AM_M: // ModR/M byte will refer only to memory |
michael@0 | 236 | case AM_Q: // Operand is either an MMX register or memory (complex |
michael@0 | 237 | // evaluation), specified by ModR/M byte |
michael@0 | 238 | case AM_W: // Operand is either a 128-bit XMM register or memory (complex |
michael@0 | 239 | // eval), specified by ModR/M byte |
michael@0 | 240 | have_modrm_ = true; |
michael@0 | 241 | should_decode_modrm_ = true; |
michael@0 | 242 | break; |
michael@0 | 243 | |
michael@0 | 244 | // These addressing modes specify an immediate or an offset value |
michael@0 | 245 | // directly, so we need to look at the operand type to see how many |
michael@0 | 246 | // bytes. |
michael@0 | 247 | case AM_I: // Immediate data. |
michael@0 | 248 | case AM_J: // Jump to offset. |
michael@0 | 249 | case AM_O: // Operand is at offset. |
michael@0 | 250 | switch (flag_operand & OT_MASK) { |
michael@0 | 251 | case OT_B: // Byte regardless of operand-size attribute. |
michael@0 | 252 | operand_bytes_ += OS_BYTE; |
michael@0 | 253 | break; |
michael@0 | 254 | case OT_C: // Byte or word, depending on operand-size attribute. |
michael@0 | 255 | if (operand_is_32_bits_) |
michael@0 | 256 | operand_bytes_ += OS_WORD; |
michael@0 | 257 | else |
michael@0 | 258 | operand_bytes_ += OS_BYTE; |
michael@0 | 259 | break; |
michael@0 | 260 | case OT_D: // Doubleword, regardless of operand-size attribute. |
michael@0 | 261 | operand_bytes_ += OS_DOUBLE_WORD; |
michael@0 | 262 | break; |
michael@0 | 263 | case OT_DQ: // Double-quadword, regardless of operand-size attribute. |
michael@0 | 264 | operand_bytes_ += OS_DOUBLE_QUAD_WORD; |
michael@0 | 265 | break; |
michael@0 | 266 | case OT_P: // 32-bit or 48-bit pointer, depending on operand-size |
michael@0 | 267 | // attribute. |
michael@0 | 268 | if (operand_is_32_bits_) |
michael@0 | 269 | operand_bytes_ += OS_48_BIT_POINTER; |
michael@0 | 270 | else |
michael@0 | 271 | operand_bytes_ += OS_32_BIT_POINTER; |
michael@0 | 272 | break; |
michael@0 | 273 | case OT_PS: // 128-bit packed single-precision floating-point data. |
michael@0 | 274 | operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; |
michael@0 | 275 | break; |
michael@0 | 276 | case OT_Q: // Quadword, regardless of operand-size attribute. |
michael@0 | 277 | operand_bytes_ += OS_QUAD_WORD; |
michael@0 | 278 | break; |
michael@0 | 279 | case OT_S: // 6-byte pseudo-descriptor. |
michael@0 | 280 | operand_bytes_ += OS_PSEUDO_DESCRIPTOR; |
michael@0 | 281 | break; |
michael@0 | 282 | case OT_SD: // Scalar Double-Precision Floating-Point Value |
michael@0 | 283 | case OT_PD: // Unaligned packed double-precision floating point value |
michael@0 | 284 | operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; |
michael@0 | 285 | break; |
michael@0 | 286 | case OT_SS: |
michael@0 | 287 | // Scalar element of a 128-bit packed single-precision |
michael@0 | 288 | // floating data. |
michael@0 | 289 | // We simply return enItUnknown since we don't have to support |
michael@0 | 290 | // floating point |
michael@0 | 291 | succeeded = false; |
michael@0 | 292 | break; |
michael@0 | 293 | case OT_V: // Word or doubleword, depending on operand-size attribute. |
michael@0 | 294 | if (operand_is_32_bits_) |
michael@0 | 295 | operand_bytes_ += OS_DOUBLE_WORD; |
michael@0 | 296 | else |
michael@0 | 297 | operand_bytes_ += OS_WORD; |
michael@0 | 298 | break; |
michael@0 | 299 | case OT_W: // Word, regardless of operand-size attribute. |
michael@0 | 300 | operand_bytes_ += OS_WORD; |
michael@0 | 301 | break; |
michael@0 | 302 | |
michael@0 | 303 | // Can safely ignore these. |
michael@0 | 304 | case OT_A: // Two one-word operands in memory or two double-word |
michael@0 | 305 | // operands in memory |
michael@0 | 306 | case OT_PI: // Quadword MMX technology register (e.g. mm0) |
michael@0 | 307 | case OT_SI: // Doubleword integer register (e.g., eax) |
michael@0 | 308 | break; |
michael@0 | 309 | |
michael@0 | 310 | default: |
michael@0 | 311 | break; |
michael@0 | 312 | } |
michael@0 | 313 | break; |
michael@0 | 314 | |
michael@0 | 315 | default: |
michael@0 | 316 | break; |
michael@0 | 317 | } |
michael@0 | 318 | |
michael@0 | 319 | return succeeded; |
michael@0 | 320 | } |
michael@0 | 321 | |
michael@0 | 322 | bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, |
michael@0 | 323 | unsigned int* size) { |
michael@0 | 324 | // If we don't need to decode, we just return the size of the ModR/M |
michael@0 | 325 | // byte (there is never a SIB byte in this case). |
michael@0 | 326 | if (!should_decode_modrm_) { |
michael@0 | 327 | (*size)++; |
michael@0 | 328 | return true; |
michael@0 | 329 | } |
michael@0 | 330 | |
michael@0 | 331 | // We never care about the reg field, only the combination of the mod |
michael@0 | 332 | // and r/m fields, so let's start by packing those fields together into |
michael@0 | 333 | // 5 bits. |
michael@0 | 334 | unsigned char modrm = (*start_byte); |
michael@0 | 335 | unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field |
michael@0 | 336 | modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field |
michael@0 | 337 | mod = mod >> 3; // shift the mod field to the right place |
michael@0 | 338 | modrm = mod | modrm; // combine the r/m and mod fields as discussed |
michael@0 | 339 | mod = mod >> 3; // shift the mod field to bits 2..0 |
michael@0 | 340 | |
michael@0 | 341 | // Invariant: modrm contains the mod field in bits 4..3 and the r/m field |
michael@0 | 342 | // in bits 2..0, and mod contains the mod field in bits 2..0 |
michael@0 | 343 | |
michael@0 | 344 | const ModrmEntry* modrm_entry = 0; |
michael@0 | 345 | if (address_is_32_bits_) |
michael@0 | 346 | modrm_entry = &s_ia32_modrm_map_[modrm]; |
michael@0 | 347 | else |
michael@0 | 348 | modrm_entry = &s_ia16_modrm_map_[modrm]; |
michael@0 | 349 | |
michael@0 | 350 | // Invariant: modrm_entry points to information that we need to decode |
michael@0 | 351 | // the ModR/M byte. |
michael@0 | 352 | |
michael@0 | 353 | // Add to the count of operand bytes, if the ModR/M byte indicates |
michael@0 | 354 | // that some operands are encoded in the instruction. |
michael@0 | 355 | if (modrm_entry->is_encoded_in_instruction_) |
michael@0 | 356 | operand_bytes_ += modrm_entry->operand_size_; |
michael@0 | 357 | |
michael@0 | 358 | // Process the SIB byte if necessary, and return the count |
michael@0 | 359 | // of ModR/M and SIB bytes. |
michael@0 | 360 | if (modrm_entry->use_sib_byte_) { |
michael@0 | 361 | (*size)++; |
michael@0 | 362 | return ProcessSib(start_byte + 1, mod, size); |
michael@0 | 363 | } else { |
michael@0 | 364 | (*size)++; |
michael@0 | 365 | return true; |
michael@0 | 366 | } |
michael@0 | 367 | } |
michael@0 | 368 | |
michael@0 | 369 | bool MiniDisassembler::ProcessSib(unsigned char* start_byte, |
michael@0 | 370 | unsigned char mod, |
michael@0 | 371 | unsigned int* size) { |
michael@0 | 372 | // get the mod field from the 2..0 bits of the SIB byte |
michael@0 | 373 | unsigned char sib_base = (*start_byte) & 0x07; |
michael@0 | 374 | if (0x05 == sib_base) { |
michael@0 | 375 | switch (mod) { |
michael@0 | 376 | case 0x00: // mod == 00 |
michael@0 | 377 | case 0x02: // mod == 10 |
michael@0 | 378 | operand_bytes_ += OS_DOUBLE_WORD; |
michael@0 | 379 | break; |
michael@0 | 380 | case 0x01: // mod == 01 |
michael@0 | 381 | operand_bytes_ += OS_BYTE; |
michael@0 | 382 | break; |
michael@0 | 383 | case 0x03: // mod == 11 |
michael@0 | 384 | // According to the IA-32 docs, there does not seem to be a disp |
michael@0 | 385 | // value for this value of mod |
michael@0 | 386 | default: |
michael@0 | 387 | break; |
michael@0 | 388 | } |
michael@0 | 389 | } |
michael@0 | 390 | |
michael@0 | 391 | (*size)++; |
michael@0 | 392 | return true; |
michael@0 | 393 | } |
michael@0 | 394 | |
michael@0 | 395 | }; // namespace sidestep |