|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
|
2 // Use of this source code is governed by a BSD-style license that can be |
|
3 // found in the LICENSE file. |
|
4 |
|
5 // Implementation of MiniDisassembler. |
|
6 |
|
7 #ifdef _WIN64 |
|
8 #error The code in this file should not be used on 64-bit Windows. |
|
9 #endif |
|
10 |
|
11 #include "sandbox/win/src/sidestep/mini_disassembler.h" |
|
12 |
|
13 namespace sidestep { |
|
14 |
|
15 MiniDisassembler::MiniDisassembler(bool operand_default_is_32_bits, |
|
16 bool address_default_is_32_bits) |
|
17 : operand_default_is_32_bits_(operand_default_is_32_bits), |
|
18 address_default_is_32_bits_(address_default_is_32_bits) { |
|
19 Initialize(); |
|
20 } |
|
21 |
|
22 MiniDisassembler::MiniDisassembler() |
|
23 : operand_default_is_32_bits_(true), |
|
24 address_default_is_32_bits_(true) { |
|
25 Initialize(); |
|
26 } |
|
27 |
|
28 InstructionType MiniDisassembler::Disassemble( |
|
29 unsigned char* start_byte, |
|
30 unsigned int* instruction_bytes) { |
|
31 // Clean up any state from previous invocations. |
|
32 Initialize(); |
|
33 |
|
34 // Start by processing any prefixes. |
|
35 unsigned char* current_byte = start_byte; |
|
36 unsigned int size = 0; |
|
37 InstructionType instruction_type = ProcessPrefixes(current_byte, &size); |
|
38 |
|
39 if (IT_UNKNOWN == instruction_type) |
|
40 return instruction_type; |
|
41 |
|
42 current_byte += size; |
|
43 size = 0; |
|
44 |
|
45 // Invariant: We have stripped all prefixes, and the operand_is_32_bits_ |
|
46 // and address_is_32_bits_ flags are correctly set. |
|
47 |
|
48 instruction_type = ProcessOpcode(current_byte, 0, &size); |
|
49 |
|
50 // Check for error processing instruction |
|
51 if ((IT_UNKNOWN == instruction_type_) || (IT_UNUSED == instruction_type_)) { |
|
52 return IT_UNKNOWN; |
|
53 } |
|
54 |
|
55 current_byte += size; |
|
56 |
|
57 // Invariant: operand_bytes_ indicates the total size of operands |
|
58 // specified by the opcode and/or ModR/M byte and/or SIB byte. |
|
59 // pCurrentByte points to the first byte after the ModR/M byte, or after |
|
60 // the SIB byte if it is present (i.e. the first byte of any operands |
|
61 // encoded in the instruction). |
|
62 |
|
63 // We get the total length of any prefixes, the opcode, and the ModR/M and |
|
64 // SIB bytes if present, by taking the difference of the original starting |
|
65 // address and the current byte (which points to the first byte of the |
|
66 // operands if present, or to the first byte of the next instruction if |
|
67 // they are not). Adding the count of bytes in the operands encoded in |
|
68 // the instruction gives us the full length of the instruction in bytes. |
|
69 *instruction_bytes += operand_bytes_ + (current_byte - start_byte); |
|
70 |
|
71 // Return the instruction type, which was set by ProcessOpcode(). |
|
72 return instruction_type_; |
|
73 } |
|
74 |
|
75 void MiniDisassembler::Initialize() { |
|
76 operand_is_32_bits_ = operand_default_is_32_bits_; |
|
77 address_is_32_bits_ = address_default_is_32_bits_; |
|
78 operand_bytes_ = 0; |
|
79 have_modrm_ = false; |
|
80 should_decode_modrm_ = false; |
|
81 instruction_type_ = IT_UNKNOWN; |
|
82 got_f2_prefix_ = false; |
|
83 got_f3_prefix_ = false; |
|
84 got_66_prefix_ = false; |
|
85 } |
|
86 |
|
87 InstructionType MiniDisassembler::ProcessPrefixes(unsigned char* start_byte, |
|
88 unsigned int* size) { |
|
89 InstructionType instruction_type = IT_GENERIC; |
|
90 const Opcode& opcode = s_ia32_opcode_map_[0].table_[*start_byte]; |
|
91 |
|
92 switch (opcode.type_) { |
|
93 case IT_PREFIX_ADDRESS: |
|
94 address_is_32_bits_ = !address_default_is_32_bits_; |
|
95 goto nochangeoperand; |
|
96 case IT_PREFIX_OPERAND: |
|
97 operand_is_32_bits_ = !operand_default_is_32_bits_; |
|
98 nochangeoperand: |
|
99 case IT_PREFIX: |
|
100 |
|
101 if (0xF2 == (*start_byte)) |
|
102 got_f2_prefix_ = true; |
|
103 else if (0xF3 == (*start_byte)) |
|
104 got_f3_prefix_ = true; |
|
105 else if (0x66 == (*start_byte)) |
|
106 got_66_prefix_ = true; |
|
107 |
|
108 instruction_type = opcode.type_; |
|
109 (*size)++; |
|
110 // we got a prefix, so add one and check next byte |
|
111 ProcessPrefixes(start_byte + 1, size); |
|
112 default: |
|
113 break; // not a prefix byte |
|
114 } |
|
115 |
|
116 return instruction_type; |
|
117 } |
|
118 |
|
119 InstructionType MiniDisassembler::ProcessOpcode(unsigned char* start_byte, |
|
120 unsigned int table_index, |
|
121 unsigned int* size) { |
|
122 const OpcodeTable& table = s_ia32_opcode_map_[table_index]; // Get our table |
|
123 unsigned char current_byte = (*start_byte) >> table.shift_; |
|
124 current_byte = current_byte & table.mask_; // Mask out the bits we will use |
|
125 |
|
126 // Check whether the byte we have is inside the table we have. |
|
127 if (current_byte < table.min_lim_ || current_byte > table.max_lim_) { |
|
128 instruction_type_ = IT_UNKNOWN; |
|
129 return instruction_type_; |
|
130 } |
|
131 |
|
132 const Opcode& opcode = table.table_[current_byte]; |
|
133 if (IT_UNUSED == opcode.type_) { |
|
134 // This instruction is not used by the IA-32 ISA, so we indicate |
|
135 // this to the user. Probably means that we were pointed to |
|
136 // a byte in memory that was not the start of an instruction. |
|
137 instruction_type_ = IT_UNUSED; |
|
138 return instruction_type_; |
|
139 } else if (IT_REFERENCE == opcode.type_) { |
|
140 // We are looking at an opcode that has more bytes (or is continued |
|
141 // in the ModR/M byte). Recursively find the opcode definition in |
|
142 // the table for the opcode's next byte. |
|
143 (*size)++; |
|
144 ProcessOpcode(start_byte + 1, opcode.table_index_, size); |
|
145 return instruction_type_; |
|
146 } |
|
147 |
|
148 const SpecificOpcode* specific_opcode = reinterpret_cast< |
|
149 const SpecificOpcode*>(&opcode); |
|
150 if (opcode.is_prefix_dependent_) { |
|
151 if (got_f2_prefix_ && opcode.opcode_if_f2_prefix_.mnemonic_ != 0) { |
|
152 specific_opcode = &opcode.opcode_if_f2_prefix_; |
|
153 } else if (got_f3_prefix_ && opcode.opcode_if_f3_prefix_.mnemonic_ != 0) { |
|
154 specific_opcode = &opcode.opcode_if_f3_prefix_; |
|
155 } else if (got_66_prefix_ && opcode.opcode_if_66_prefix_.mnemonic_ != 0) { |
|
156 specific_opcode = &opcode.opcode_if_66_prefix_; |
|
157 } |
|
158 } |
|
159 |
|
160 // Inv: The opcode type is known. |
|
161 instruction_type_ = specific_opcode->type_; |
|
162 |
|
163 // Let's process the operand types to see if we have any immediate |
|
164 // operands, and/or a ModR/M byte. |
|
165 |
|
166 ProcessOperand(specific_opcode->flag_dest_); |
|
167 ProcessOperand(specific_opcode->flag_source_); |
|
168 ProcessOperand(specific_opcode->flag_aux_); |
|
169 |
|
170 // Inv: We have processed the opcode and incremented operand_bytes_ |
|
171 // by the number of bytes of any operands specified by the opcode |
|
172 // that are stored in the instruction (not registers etc.). Now |
|
173 // we need to return the total number of bytes for the opcode and |
|
174 // for the ModR/M or SIB bytes if they are present. |
|
175 |
|
176 if (table.mask_ != 0xff) { |
|
177 if (have_modrm_) { |
|
178 // we're looking at a ModR/M byte so we're not going to |
|
179 // count that into the opcode size |
|
180 ProcessModrm(start_byte, size); |
|
181 return IT_GENERIC; |
|
182 } else { |
|
183 // need to count the ModR/M byte even if it's just being |
|
184 // used for opcode extension |
|
185 (*size)++; |
|
186 return IT_GENERIC; |
|
187 } |
|
188 } else { |
|
189 if (have_modrm_) { |
|
190 // The ModR/M byte is the next byte. |
|
191 (*size)++; |
|
192 ProcessModrm(start_byte + 1, size); |
|
193 return IT_GENERIC; |
|
194 } else { |
|
195 (*size)++; |
|
196 return IT_GENERIC; |
|
197 } |
|
198 } |
|
199 } |
|
200 |
|
201 bool MiniDisassembler::ProcessOperand(int flag_operand) { |
|
202 bool succeeded = true; |
|
203 if (AM_NOT_USED == flag_operand) |
|
204 return succeeded; |
|
205 |
|
206 // Decide what to do based on the addressing mode. |
|
207 switch (flag_operand & AM_MASK) { |
|
208 // No ModR/M byte indicated by these addressing modes, and no |
|
209 // additional (e.g. immediate) parameters. |
|
210 case AM_A: // Direct address |
|
211 case AM_F: // EFLAGS register |
|
212 case AM_X: // Memory addressed by the DS:SI register pair |
|
213 case AM_Y: // Memory addressed by the ES:DI register pair |
|
214 case AM_IMPLICIT: // Parameter is implicit, occupies no space in |
|
215 // instruction |
|
216 break; |
|
217 |
|
218 // There is a ModR/M byte but it does not necessarily need |
|
219 // to be decoded. |
|
220 case AM_C: // reg field of ModR/M selects a control register |
|
221 case AM_D: // reg field of ModR/M selects a debug register |
|
222 case AM_G: // reg field of ModR/M selects a general register |
|
223 case AM_P: // reg field of ModR/M selects an MMX register |
|
224 case AM_R: // mod field of ModR/M may refer only to a general register |
|
225 case AM_S: // reg field of ModR/M selects a segment register |
|
226 case AM_T: // reg field of ModR/M selects a test register |
|
227 case AM_V: // reg field of ModR/M selects a 128-bit XMM register |
|
228 have_modrm_ = true; |
|
229 break; |
|
230 |
|
231 // In these addressing modes, there is a ModR/M byte and it needs to be |
|
232 // decoded. No other (e.g. immediate) params than indicated in ModR/M. |
|
233 case AM_E: // Operand is either a general-purpose register or memory, |
|
234 // specified by ModR/M byte |
|
235 case AM_M: // ModR/M byte will refer only to memory |
|
236 case AM_Q: // Operand is either an MMX register or memory (complex |
|
237 // evaluation), specified by ModR/M byte |
|
238 case AM_W: // Operand is either a 128-bit XMM register or memory (complex |
|
239 // eval), specified by ModR/M byte |
|
240 have_modrm_ = true; |
|
241 should_decode_modrm_ = true; |
|
242 break; |
|
243 |
|
244 // These addressing modes specify an immediate or an offset value |
|
245 // directly, so we need to look at the operand type to see how many |
|
246 // bytes. |
|
247 case AM_I: // Immediate data. |
|
248 case AM_J: // Jump to offset. |
|
249 case AM_O: // Operand is at offset. |
|
250 switch (flag_operand & OT_MASK) { |
|
251 case OT_B: // Byte regardless of operand-size attribute. |
|
252 operand_bytes_ += OS_BYTE; |
|
253 break; |
|
254 case OT_C: // Byte or word, depending on operand-size attribute. |
|
255 if (operand_is_32_bits_) |
|
256 operand_bytes_ += OS_WORD; |
|
257 else |
|
258 operand_bytes_ += OS_BYTE; |
|
259 break; |
|
260 case OT_D: // Doubleword, regardless of operand-size attribute. |
|
261 operand_bytes_ += OS_DOUBLE_WORD; |
|
262 break; |
|
263 case OT_DQ: // Double-quadword, regardless of operand-size attribute. |
|
264 operand_bytes_ += OS_DOUBLE_QUAD_WORD; |
|
265 break; |
|
266 case OT_P: // 32-bit or 48-bit pointer, depending on operand-size |
|
267 // attribute. |
|
268 if (operand_is_32_bits_) |
|
269 operand_bytes_ += OS_48_BIT_POINTER; |
|
270 else |
|
271 operand_bytes_ += OS_32_BIT_POINTER; |
|
272 break; |
|
273 case OT_PS: // 128-bit packed single-precision floating-point data. |
|
274 operand_bytes_ += OS_128_BIT_PACKED_SINGLE_PRECISION_FLOATING; |
|
275 break; |
|
276 case OT_Q: // Quadword, regardless of operand-size attribute. |
|
277 operand_bytes_ += OS_QUAD_WORD; |
|
278 break; |
|
279 case OT_S: // 6-byte pseudo-descriptor. |
|
280 operand_bytes_ += OS_PSEUDO_DESCRIPTOR; |
|
281 break; |
|
282 case OT_SD: // Scalar Double-Precision Floating-Point Value |
|
283 case OT_PD: // Unaligned packed double-precision floating point value |
|
284 operand_bytes_ += OS_DOUBLE_PRECISION_FLOATING; |
|
285 break; |
|
286 case OT_SS: |
|
287 // Scalar element of a 128-bit packed single-precision |
|
288 // floating data. |
|
289 // We simply return enItUnknown since we don't have to support |
|
290 // floating point |
|
291 succeeded = false; |
|
292 break; |
|
293 case OT_V: // Word or doubleword, depending on operand-size attribute. |
|
294 if (operand_is_32_bits_) |
|
295 operand_bytes_ += OS_DOUBLE_WORD; |
|
296 else |
|
297 operand_bytes_ += OS_WORD; |
|
298 break; |
|
299 case OT_W: // Word, regardless of operand-size attribute. |
|
300 operand_bytes_ += OS_WORD; |
|
301 break; |
|
302 |
|
303 // Can safely ignore these. |
|
304 case OT_A: // Two one-word operands in memory or two double-word |
|
305 // operands in memory |
|
306 case OT_PI: // Quadword MMX technology register (e.g. mm0) |
|
307 case OT_SI: // Doubleword integer register (e.g., eax) |
|
308 break; |
|
309 |
|
310 default: |
|
311 break; |
|
312 } |
|
313 break; |
|
314 |
|
315 default: |
|
316 break; |
|
317 } |
|
318 |
|
319 return succeeded; |
|
320 } |
|
321 |
|
322 bool MiniDisassembler::ProcessModrm(unsigned char* start_byte, |
|
323 unsigned int* size) { |
|
324 // If we don't need to decode, we just return the size of the ModR/M |
|
325 // byte (there is never a SIB byte in this case). |
|
326 if (!should_decode_modrm_) { |
|
327 (*size)++; |
|
328 return true; |
|
329 } |
|
330 |
|
331 // We never care about the reg field, only the combination of the mod |
|
332 // and r/m fields, so let's start by packing those fields together into |
|
333 // 5 bits. |
|
334 unsigned char modrm = (*start_byte); |
|
335 unsigned char mod = modrm & 0xC0; // mask out top two bits to get mod field |
|
336 modrm = modrm & 0x07; // mask out bottom 3 bits to get r/m field |
|
337 mod = mod >> 3; // shift the mod field to the right place |
|
338 modrm = mod | modrm; // combine the r/m and mod fields as discussed |
|
339 mod = mod >> 3; // shift the mod field to bits 2..0 |
|
340 |
|
341 // Invariant: modrm contains the mod field in bits 4..3 and the r/m field |
|
342 // in bits 2..0, and mod contains the mod field in bits 2..0 |
|
343 |
|
344 const ModrmEntry* modrm_entry = 0; |
|
345 if (address_is_32_bits_) |
|
346 modrm_entry = &s_ia32_modrm_map_[modrm]; |
|
347 else |
|
348 modrm_entry = &s_ia16_modrm_map_[modrm]; |
|
349 |
|
350 // Invariant: modrm_entry points to information that we need to decode |
|
351 // the ModR/M byte. |
|
352 |
|
353 // Add to the count of operand bytes, if the ModR/M byte indicates |
|
354 // that some operands are encoded in the instruction. |
|
355 if (modrm_entry->is_encoded_in_instruction_) |
|
356 operand_bytes_ += modrm_entry->operand_size_; |
|
357 |
|
358 // Process the SIB byte if necessary, and return the count |
|
359 // of ModR/M and SIB bytes. |
|
360 if (modrm_entry->use_sib_byte_) { |
|
361 (*size)++; |
|
362 return ProcessSib(start_byte + 1, mod, size); |
|
363 } else { |
|
364 (*size)++; |
|
365 return true; |
|
366 } |
|
367 } |
|
368 |
|
369 bool MiniDisassembler::ProcessSib(unsigned char* start_byte, |
|
370 unsigned char mod, |
|
371 unsigned int* size) { |
|
372 // get the mod field from the 2..0 bits of the SIB byte |
|
373 unsigned char sib_base = (*start_byte) & 0x07; |
|
374 if (0x05 == sib_base) { |
|
375 switch (mod) { |
|
376 case 0x00: // mod == 00 |
|
377 case 0x02: // mod == 10 |
|
378 operand_bytes_ += OS_DOUBLE_WORD; |
|
379 break; |
|
380 case 0x01: // mod == 01 |
|
381 operand_bytes_ += OS_BYTE; |
|
382 break; |
|
383 case 0x03: // mod == 11 |
|
384 // According to the IA-32 docs, there does not seem to be a disp |
|
385 // value for this value of mod |
|
386 default: |
|
387 break; |
|
388 } |
|
389 } |
|
390 |
|
391 (*size)++; |
|
392 return true; |
|
393 } |
|
394 |
|
395 }; // namespace sidestep |