security/sandbox/win/src/sidestep/mini_disassembler.cpp

Wed, 31 Dec 2014 06:09:35 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Wed, 31 Dec 2014 06:09:35 +0100
changeset 0
6474c204b198
permissions
-rw-r--r--

Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.

     1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
     2 // Use of this source code is governed by a BSD-style license that can be
     3 // found in the LICENSE file.
     5 // Implementation of MiniDisassembler.
     7 #ifdef _WIN64
     8 #error The code in this file should not be used on 64-bit Windows.
     9 #endif
    11 #include "sandbox/win/src/sidestep/mini_disassembler.h"
    13 namespace sidestep {
    15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits,
    16                                    bool address_default_is_32_bits)
    17     : operand_default_is_32_bits_(operand_default_is_32_bits),
    18       address_default_is_32_bits_(address_default_is_32_bits) {
    19   Initialize();
    20 }
    22 MiniDisassembler::MiniDisassembler()
    23     : operand_default_is_32_bits_(true),
    24       address_default_is_32_bits_(true) {
    25   Initialize();
    26 }
    28 InstructionType MiniDisassembler::Disassemble(
    29     unsigned char* start_byte,
    30     unsigned int* instruction_bytes) {
    31   // Clean up any state from previous invocations.
    32   Initialize();
    34   // Start by processing any prefixes.
    35   unsigned char* current_byte = start_byte;
    36   unsigned int size = 0;
    37   InstructionType instruction_type = ProcessPrefixes(current_byte, &size);
    39   if (IT_UNKNOWN == instruction_type)
    40     return instruction_type;
    42   current_byte += size;
    43   size = 0;
    45   // Invariant: We have stripped all prefixes, and the operand_is_32_bits_
    46   // and address_is_32_bits_ flags are correctly set.
    48   instruction_type = ProcessOpcode(current_byte, 0, &size);
    50   // Check for error processing instruction
    51   if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) {
    52     return IT_UNKNOWN;
    53   }
    55   current_byte += size;
    57   // Invariant: operand_bytes_ indicates the total size of operands
    58   // specified by the opcode and/or ModR/M byte and/or SIB byte.
    59   // pCurrentByte points to the first byte after the ModR/M byte, or after
    60   // the SIB byte if it is present (i.e. the first byte of any operands
    61   // encoded in the instruction).
    63   // We get the total length of any prefixes, the opcode, and the ModR/M and
    64   // SIB bytes if present, by taking the difference of the original starting
    65   // address and the current byte (which points to the first byte of the
    66   // operands if present, or to the first byte of the next instruction if
    67   // they are not).  Adding the count of bytes in the operands encoded in
    68   // the instruction gives us the full length of the instruction in bytes.
    69   *instruction_bytes += operand_bytes_ + (current_byte - start_byte);
    71   // Return the instruction type, which was set by ProcessOpcode().
    72   return instruction_type_;
    73 }
    75 void MiniDisassembler::Initialize() {
    76   operand_is_32_bits_ = operand_default_is_32_bits_;
    77   address_is_32_bits_ = address_default_is_32_bits_;
    78   operand_bytes_ = 0;
    79   have_modrm_ = false;
    80   should_decode_modrm_ = false;
    81   instruction_type_ = IT_UNKNOWN;
    82   got_f2_prefix_ = false;
    83   got_f3_prefix_ = false;
    84   got_66_prefix_ = false;
    85 }
    87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte,
    88                                                   unsigned int* size) {
    89   InstructionType instruction_type = IT_GENERIC;
    90   const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte];
    92   switch (opcode.type_) {
    93     case IT_PREFIX_ADDRESS:
    94       address_is_32_bits_ = !address_default_is_32_bits_;
    95       goto nochangeoperand;
    96     case IT_PREFIX_OPERAND:
    97       operand_is_32_bits_ = !operand_default_is_32_bits_;
    98       nochangeoperand:
    99     case IT_PREFIX:
   101       if (0xF2 == (*start_byte))
   102         got_f2_prefix_ = true;
   103       else if (0xF3 == (*start_byte))
   104         got_f3_prefix_ = true;
   105       else if (0x66 == (*start_byte))
   106         got_66_prefix_ = true;
   108       instruction_type = opcode.type_;
   109       (*size)++;
   110       // we got a prefix, so add one and check next byte
   111       ProcessPrefixes(start_byte + 1, size);
   112     default:
   113       break;   // not a prefix byte
   114   }
   116   return instruction_type;
   117 }
   119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte,
   120                                                 unsigned int table_index,
   121                                                 unsigned int* size) {
   122   const OpcodeTable& table = s_ia32_opcode_map_[table_index];   // Get our table
   123   unsigned char current_byte = (*start_byte) >> table.shift_;
   124   current_byte = current_byte & table.mask_;  // Mask out the bits we will use
   126   // Check whether the byte we have is inside the table we have.
   127   if (current_byte < table.min_lim_ || current_byte > table.max_lim_) {
   128     instruction_type_ = IT_UNKNOWN;
   129     return instruction_type_;
   130   }
   132   const Opcode& opcode = table.table_[current_byte];
   133   if (IT_UNUSED == opcode.type_) {
   134     // This instruction is not used by the IA-32 ISA, so we indicate
   135     // this to the user.  Probably means that we were pointed to
   136     // a byte in memory that was not the start of an instruction.
   137     instruction_type_ = IT_UNUSED;
   138     return instruction_type_;
   139   } else if (IT_REFERENCE == opcode.type_) {
   140     // We are looking at an opcode that has more bytes (or is continued
   141     // in the ModR/M byte).  Recursively find the opcode definition in
   142     // the table for the opcode's next byte.
   143     (*size)++;
   144     ProcessOpcode(start_byte + 1, opcode.table_index_, size);
   145     return instruction_type_;
   146   }
   148   const SpecificOpcode* specific_opcode = reinterpret_cast<
   149                                               const SpecificOpcode*>(&opcode);
   150   if (opcode.is_prefix_dependent_) {
   151     if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) {
   152       specific_opcode = &opcode.opcode_if_f2_prefix_;
   153     } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) {
   154       specific_opcode = &opcode.opcode_if_f3_prefix_;
   155     } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) {
   156       specific_opcode = &opcode.opcode_if_66_prefix_;
   157     }
   158   }
   160   // Inv: The opcode type is known.
   161   instruction_type_ = specific_opcode->type_;
   163   // Let's process the operand types to see if we have any immediate
   164   // operands, and/or a ModR/M byte.
   166   ProcessOperand(specific_opcode->flag_dest_);
   167   ProcessOperand(specific_opcode->flag_source_);
   168   ProcessOperand(specific_opcode->flag_aux_);
   170   // Inv: We have processed the opcode and incremented operand_bytes_
   171   // by the number of bytes of any operands specified by the opcode
   172   // that are stored in the instruction (not registers etc.).  Now
   173   // we need to return the total number of bytes for the opcode and
   174   // for the ModR/M or SIB bytes if they are present.
   176   if (table.mask_ != 0xff) {
   177     if (have_modrm_) {
   178       // we're looking at a ModR/M byte so we're not going to
   179       // count that into the opcode size
   180       ProcessModrm(start_byte, size);
   181       return IT_GENERIC;
   182     } else {
   183       // need to count the ModR/M byte even if it's just being
   184       // used for opcode extension
   185       (*size)++;
   186       return IT_GENERIC;
   187     }
   188   } else {
   189     if (have_modrm_) {
   190       // The ModR/M byte is the next byte.
   191       (*size)++;
   192       ProcessModrm(start_byte + 1, size);
   193       return IT_GENERIC;
   194     } else {
   195       (*size)++;
   196       return IT_GENERIC;
   197     }
   198   }
   199 }
   201 bool MiniDisassembler::ProcessOperand(int flag_operand) {
   202   bool succeeded = true;
   203   if (AM_NOT_USED == flag_operand)
   204     return succeeded;
   206   // Decide what to do based on the addressing mode.
   207   switch (flag_operand & AM_MASK) {
   208     // No ModR/M byte indicated by these addressing modes, and no
   209     // additional (e.g. immediate) parameters.
   210     case AM_A:  // Direct address
   211     case AM_F:  // EFLAGS register
   212     case AM_X:  // Memory addressed by the DS:SI register pair
   213     case AM_Y:  // Memory addressed by the ES:DI register pair
   214     case AM_IMPLICIT:  // Parameter is implicit, occupies no space in
   215                        // instruction
   216       break;
   218     // There is a ModR/M byte but it does not necessarily need
   219     // to be decoded.
   220     case AM_C:  // reg field of ModR/M selects a control register
   221     case AM_D:  // reg field of ModR/M selects a debug register
   222     case AM_G:  // reg field of ModR/M selects a general register
   223     case AM_P:  // reg field of ModR/M selects an MMX register
   224     case AM_R:  // mod field of ModR/M may refer only to a general register
   225     case AM_S:  // reg field of ModR/M selects a segment register
   226     case AM_T:  // reg field of ModR/M selects a test register
   227     case AM_V:  // reg field of ModR/M selects a 128-bit XMM register
   228       have_modrm_ = true;
   229       break;
   231     // In these addressing modes, there is a ModR/M byte and it needs to be
   232     // decoded. No other (e.g. immediate) params than indicated in ModR/M.
   233     case AM_E:  // Operand is either a general-purpose register or memory,
   234                 // specified by ModR/M byte
   235     case AM_M:  // ModR/M byte will refer only to memory
   236     case AM_Q:  // Operand is either an MMX register or memory (complex
   237                 // evaluation), specified by ModR/M byte
   238     case AM_W:  // Operand is either a 128-bit XMM register or memory (complex
   239                 // eval), specified by ModR/M byte
   240       have_modrm_ = true;
   241       should_decode_modrm_ = true;
   242       break;
   244     // These addressing modes specify an immediate or an offset value
   245     // directly, so we need to look at the operand type to see how many
   246     // bytes.
   247     case AM_I:  // Immediate data.
   248     case AM_J:  // Jump to offset.
   249     case AM_O:  // Operand is at offset.
   250       switch (flag_operand & OT_MASK) {
   251         case OT_B:  // Byte regardless of operand-size attribute.
   252           operand_bytes_ += OS_BYTE;
   253           break;
   254         case OT_C:  // Byte or word, depending on operand-size attribute.
   255           if (operand_is_32_bits_)
   256             operand_bytes_ += OS_WORD;
   257           else
   258             operand_bytes_ += OS_BYTE;
   259           break;
   260         case OT_D:  // Doubleword, regardless of operand-size attribute.
   261           operand_bytes_ += OS_DOUBLE_WORD;
   262           break;
   263         case OT_DQ:  // Double-quadword, regardless of operand-size attribute.
   264           operand_bytes_ += OS_DOUBLE_QUAD_WORD;
   265           break;
   266         case OT_P:  // 32-bit or 48-bit pointer, depending on operand-size
   267                     // attribute.
   268           if (operand_is_32_bits_)
   269             operand_bytes_ += OS_48_BIT_POINTER;
   270           else
   271             operand_bytes_ += OS_32_BIT_POINTER;
   272           break;
   273         case OT_PS:  // 128-bit packed single-precision floating-point data.
   274           operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING;
   275           break;
   276         case OT_Q:  // Quadword, regardless of operand-size attribute.
   277           operand_bytes_ += OS_QUAD_WORD;
   278           break;
   279         case OT_S:  // 6-byte pseudo-descriptor.
   280           operand_bytes_ += OS_PSEUDO_DESCRIPTOR;
   281           break;
   282         case OT_SD:  // Scalar Double-Precision Floating-Point Value
   283         case OT_PD:  // Unaligned packed double-precision floating point value
   284           operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING;
   285           break;
   286         case OT_SS:
   287           // Scalar element of a 128-bit packed single-precision
   288           // floating data.
   289           // We simply return enItUnknown since we don't have to support
   290           // floating point
   291           succeeded = false;
   292           break;
   293         case OT_V:  // Word or doubleword, depending on operand-size attribute.
   294           if (operand_is_32_bits_)
   295             operand_bytes_ += OS_DOUBLE_WORD;
   296           else
   297             operand_bytes_ += OS_WORD;
   298           break;
   299         case OT_W:  // Word, regardless of operand-size attribute.
   300           operand_bytes_ += OS_WORD;
   301           break;
   303         // Can safely ignore these.
   304         case OT_A:  // Two one-word operands in memory or two double-word
   305                     // operands in memory
   306         case OT_PI:  // Quadword MMX technology register (e.g. mm0)
   307         case OT_SI:  // Doubleword integer register (e.g., eax)
   308           break;
   310         default:
   311           break;
   312       }
   313       break;
   315     default:
   316       break;
   317   }
   319   return succeeded;
   320 }
   322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte,
   323                                     unsigned int* size) {
   324   // If we don't need to decode, we just return the size of the ModR/M
   325   // byte (there is never a SIB byte in this case).
   326   if (!should_decode_modrm_) {
   327     (*size)++;
   328     return true;
   329   }
   331   // We never care about the reg field, only the combination of the mod
   332   // and r/m fields, so let's start by packing those fields together into
   333   // 5 bits.
   334   unsigned char modrm = (*start_byte);
   335   unsigned char mod = modrm & 0xC0;  // mask out top two bits to get mod field
   336   modrm = modrm & 0x07;  // mask out bottom 3 bits to get r/m field
   337   mod = mod >> 3;  // shift the mod field to the right place
   338   modrm = mod | modrm;  // combine the r/m and mod fields as discussed
   339   mod = mod >> 3;  // shift the mod field to bits 2..0
   341   // Invariant: modrm contains the mod field in bits 4..3 and the r/m field
   342   // in bits 2..0, and mod contains the mod field in bits 2..0
   344   const ModrmEntry* modrm_entry = 0;
   345   if (address_is_32_bits_)
   346     modrm_entry = &s_ia32_modrm_map_[modrm];
   347   else
   348     modrm_entry = &s_ia16_modrm_map_[modrm];
   350   // Invariant: modrm_entry points to information that we need to decode
   351   // the ModR/M byte.
   353   // Add to the count of operand bytes, if the ModR/M byte indicates
   354   // that some operands are encoded in the instruction.
   355   if (modrm_entry->is_encoded_in_instruction_)
   356     operand_bytes_ += modrm_entry->operand_size_;
   358   // Process the SIB byte if necessary, and return the count
   359   // of ModR/M and SIB bytes.
   360   if (modrm_entry->use_sib_byte_) {
   361     (*size)++;
   362     return ProcessSib(start_byte + 1, mod, size);
   363   } else {
   364     (*size)++;
   365     return true;
   366   }
   367 }
   369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte,
   370                                   unsigned char mod,
   371                                   unsigned int* size) {
   372   // get the mod field from the 2..0 bits of the SIB byte
   373   unsigned char sib_base = (*start_byte) & 0x07;
   374   if (0x05 == sib_base) {
   375     switch (mod) {
   376       case 0x00:  // mod == 00
   377       case 0x02:  // mod == 10
   378         operand_bytes_ += OS_DOUBLE_WORD;
   379         break;
   380       case 0x01:  // mod == 01
   381         operand_bytes_ += OS_BYTE;
   382         break;
   383       case 0x03:  // mod == 11
   384         // According to the IA-32 docs, there does not seem to be a disp
   385         // value for this value of mod
   386       default:
   387         break;
   388     }
   389   }
   391   (*size)++;
   392   return true;
   393 }
   395 };  // namespace sidestep

mercurial