|
1 #include <stdio.h> |
|
2 #include <stdlib.h> |
|
3 #include <string.h> |
|
4 |
|
5 #include "libdis.h" |
|
6 #include "ia32_insn.h" |
|
7 #include "ia32_operand.h" |
|
8 #include "ia32_modrm.h" |
|
9 #include "ia32_reg.h" |
|
10 #include "x86_imm.h" |
|
11 #include "x86_operand_list.h" |
|
12 |
|
13 |
|
14 |
|
15 /* apply segment override to memory operand in insn */ |
|
16 static void apply_seg( x86_op_t *op, unsigned int prefixes ) { |
|
17 if (! prefixes ) return; |
|
18 |
|
19 /* apply overrides from prefix */ |
|
20 switch ( prefixes & PREFIX_REG_MASK ) { |
|
21 case PREFIX_CS: |
|
22 op->flags |= op_cs_seg; break; |
|
23 case PREFIX_SS: |
|
24 op->flags |= op_ss_seg; break; |
|
25 case PREFIX_DS: |
|
26 op->flags |= op_ds_seg; break; |
|
27 case PREFIX_ES: |
|
28 op->flags |= op_es_seg; break; |
|
29 case PREFIX_FS: |
|
30 op->flags |= op_fs_seg; break; |
|
31 case PREFIX_GS: |
|
32 op->flags |= op_gs_seg; break; |
|
33 } |
|
34 |
|
35 return; |
|
36 } |
|
37 |
|
38 static size_t decode_operand_value( unsigned char *buf, size_t buf_len, |
|
39 x86_op_t *op, x86_insn_t *insn, |
|
40 unsigned int addr_meth, size_t op_size, |
|
41 unsigned int op_value, unsigned char modrm, |
|
42 size_t gen_regs ) { |
|
43 size_t size = 0; |
|
44 |
|
45 /* ++ Do Operand Addressing Method / Decode operand ++ */ |
|
46 switch (addr_meth) { |
|
47 /* This sets the operand Size based on the Intel Opcode Map |
|
48 * (Vol 2, Appendix A). Letter encodings are from section |
|
49 * A.1.1, 'Codes for Addressing Method' */ |
|
50 |
|
51 /* ---------------------- Addressing Method -------------- */ |
|
52 /* Note that decoding mod ModR/M operand adjusts the size of |
|
53 * the instruction, but decoding the reg operand does not. |
|
54 * This should not cause any problems, as every 'reg' operand |
|
55 * has an associated 'mod' operand. |
|
56 * Goddamn-Intel-Note: |
|
57 * Some Intel addressing methods [M, R] specify that modR/M |
|
58 * byte may only refer to a memory address/may only refer to |
|
59 * a register -- however Intel provides no clues on what to do |
|
60 * if, say, the modR/M for an M opcode decodes to a register |
|
61 * rather than a memory address ... returning 0 is out of the |
|
62 * question, as this would be an Immediate or a RelOffset, so |
|
63 * instead these modR/Ms are decoded with total disregard to |
|
64 * the M, R constraints. */ |
|
65 |
|
66 /* MODRM -- mod operand. sets size to at least 1! */ |
|
67 case ADDRMETH_E: /* ModR/M present, Gen reg or memory */ |
|
68 size = ia32_modrm_decode( buf, buf_len, op, insn, |
|
69 gen_regs ); |
|
70 break; |
|
71 case ADDRMETH_M: /* ModR/M only refers to memory */ |
|
72 size = ia32_modrm_decode( buf, buf_len, op, insn, |
|
73 gen_regs ); |
|
74 break; |
|
75 case ADDRMETH_Q: /* ModR/M present, MMX or Memory */ |
|
76 size = ia32_modrm_decode( buf, buf_len, op, insn, |
|
77 REG_MMX_OFFSET ); |
|
78 break; |
|
79 case ADDRMETH_R: /* ModR/M mod == gen reg */ |
|
80 size = ia32_modrm_decode( buf, buf_len, op, insn, |
|
81 gen_regs ); |
|
82 break; |
|
83 case ADDRMETH_W: /* ModR/M present, mem or SIMD reg */ |
|
84 size = ia32_modrm_decode( buf, buf_len, op, insn, |
|
85 REG_SIMD_OFFSET ); |
|
86 break; |
|
87 |
|
88 /* MODRM -- reg operand. does not effect size! */ |
|
89 case ADDRMETH_C: /* ModR/M reg == control reg */ |
|
90 ia32_reg_decode( modrm, op, REG_CTRL_OFFSET ); |
|
91 break; |
|
92 case ADDRMETH_D: /* ModR/M reg == debug reg */ |
|
93 ia32_reg_decode( modrm, op, REG_DEBUG_OFFSET ); |
|
94 break; |
|
95 case ADDRMETH_G: /* ModR/M reg == gen-purpose reg */ |
|
96 ia32_reg_decode( modrm, op, gen_regs ); |
|
97 break; |
|
98 case ADDRMETH_P: /* ModR/M reg == qword MMX reg */ |
|
99 ia32_reg_decode( modrm, op, REG_MMX_OFFSET ); |
|
100 break; |
|
101 case ADDRMETH_S: /* ModR/M reg == segment reg */ |
|
102 ia32_reg_decode( modrm, op, REG_SEG_OFFSET ); |
|
103 break; |
|
104 case ADDRMETH_T: /* ModR/M reg == test reg */ |
|
105 ia32_reg_decode( modrm, op, REG_TEST_OFFSET ); |
|
106 break; |
|
107 case ADDRMETH_V: /* ModR/M reg == SIMD reg */ |
|
108 ia32_reg_decode( modrm, op, REG_SIMD_OFFSET ); |
|
109 break; |
|
110 |
|
111 /* No MODRM : note these set operand type explicitly */ |
|
112 case ADDRMETH_A: /* No modR/M -- direct addr */ |
|
113 op->type = op_absolute; |
|
114 |
|
115 /* segment:offset address used in far calls */ |
|
116 x86_imm_sized( buf, buf_len, |
|
117 &op->data.absolute.segment, 2 ); |
|
118 if ( insn->addr_size == 4 ) { |
|
119 x86_imm_sized( buf, buf_len, |
|
120 &op->data.absolute.offset.off32, 4 ); |
|
121 size = 6; |
|
122 } else { |
|
123 x86_imm_sized( buf, buf_len, |
|
124 &op->data.absolute.offset.off16, 2 ); |
|
125 size = 4; |
|
126 } |
|
127 |
|
128 break; |
|
129 case ADDRMETH_I: /* Immediate val */ |
|
130 op->type = op_immediate; |
|
131 /* if it ever becomes legal to have imm as dest and |
|
132 * there is a src ModR/M operand, we are screwed! */ |
|
133 if ( op->flags & op_signed ) { |
|
134 x86_imm_signsized(buf, buf_len, &op->data.byte, |
|
135 op_size); |
|
136 } else { |
|
137 x86_imm_sized(buf, buf_len, &op->data.byte, |
|
138 op_size); |
|
139 } |
|
140 size = op_size; |
|
141 break; |
|
142 case ADDRMETH_J: /* Rel offset to add to IP [jmp] */ |
|
143 /* this fills op->data.near_offset or |
|
144 op->data.far_offset depending on the size of |
|
145 the operand */ |
|
146 op->flags |= op_signed; |
|
147 if ( op_size == 1 ) { |
|
148 /* one-byte near offset */ |
|
149 op->type = op_relative_near; |
|
150 x86_imm_signsized(buf, buf_len, |
|
151 &op->data.relative_near, 1); |
|
152 } else { |
|
153 /* far offset...is this truly signed? */ |
|
154 op->type = op_relative_far; |
|
155 x86_imm_signsized(buf, buf_len, |
|
156 &op->data.relative_far, op_size ); |
|
157 } |
|
158 size = op_size; |
|
159 break; |
|
160 case ADDRMETH_O: /* No ModR/M; op is word/dword offset */ |
|
161 /* NOTE: these are actually RVAs not offsets to seg!! */ |
|
162 /* note bene: 'O' ADDR_METH uses addr_size to |
|
163 determine operand size */ |
|
164 op->type = op_offset; |
|
165 op->flags |= op_pointer; |
|
166 x86_imm_sized( buf, buf_len, &op->data.offset, |
|
167 insn->addr_size ); |
|
168 |
|
169 size = insn->addr_size; |
|
170 break; |
|
171 |
|
172 /* Hard-coded: these are specified in the insn definition */ |
|
173 case ADDRMETH_F: /* EFLAGS register */ |
|
174 op->type = op_register; |
|
175 op->flags |= op_hardcode; |
|
176 ia32_handle_register( &op->data.reg, REG_FLAGS_INDEX ); |
|
177 break; |
|
178 case ADDRMETH_X: /* Memory addressed by DS:SI [string] */ |
|
179 op->type = op_expression; |
|
180 op->flags |= op_hardcode; |
|
181 op->flags |= op_ds_seg | op_pointer | op_string; |
|
182 ia32_handle_register( &op->data.expression.base, |
|
183 REG_DWORD_OFFSET + 6 ); |
|
184 break; |
|
185 case ADDRMETH_Y: /* Memory addressed by ES:DI [string] */ |
|
186 op->type = op_expression; |
|
187 op->flags |= op_hardcode; |
|
188 op->flags |= op_es_seg | op_pointer | op_string; |
|
189 ia32_handle_register( &op->data.expression.base, |
|
190 REG_DWORD_OFFSET + 7 ); |
|
191 break; |
|
192 case ADDRMETH_RR: /* Gen Register hard-coded in opcode */ |
|
193 op->type = op_register; |
|
194 op->flags |= op_hardcode; |
|
195 ia32_handle_register( &op->data.reg, |
|
196 op_value + gen_regs ); |
|
197 break; |
|
198 case ADDRMETH_RS: /* Seg Register hard-coded in opcode */ |
|
199 op->type = op_register; |
|
200 op->flags |= op_hardcode; |
|
201 ia32_handle_register( &op->data.reg, |
|
202 op_value + REG_SEG_OFFSET ); |
|
203 break; |
|
204 case ADDRMETH_RF: /* FPU Register hard-coded in opcode */ |
|
205 op->type = op_register; |
|
206 op->flags |= op_hardcode; |
|
207 ia32_handle_register( &op->data.reg, |
|
208 op_value + REG_FPU_OFFSET ); |
|
209 break; |
|
210 case ADDRMETH_RT: /* TST Register hard-coded in opcode */ |
|
211 op->type = op_register; |
|
212 op->flags |= op_hardcode; |
|
213 ia32_handle_register( &op->data.reg, |
|
214 op_value + REG_TEST_OFFSET ); |
|
215 break; |
|
216 case ADDRMETH_II: /* Immediate hard-coded in opcode */ |
|
217 op->type = op_immediate; |
|
218 op->data.dword = op_value; |
|
219 op->flags |= op_hardcode; |
|
220 break; |
|
221 |
|
222 case 0: /* Operand is not used */ |
|
223 default: |
|
224 /* ignore -- operand not used in this insn */ |
|
225 op->type = op_unused; /* this shouldn't happen! */ |
|
226 break; |
|
227 } |
|
228 |
|
229 return size; |
|
230 } |
|
231 |
|
232 static size_t decode_operand_size( unsigned int op_type, x86_insn_t *insn, |
|
233 x86_op_t *op ){ |
|
234 size_t size; |
|
235 |
|
236 /* ++ Do Operand Type ++ */ |
|
237 switch (op_type) { |
|
238 /* This sets the operand Size based on the Intel Opcode Map |
|
239 * (Vol 2, Appendix A). Letter encodings are from section |
|
240 * A.1.2, 'Codes for Operand Type' */ |
|
241 /* NOTE: in this routines, 'size' refers to the size |
|
242 * of the operand in the raw (encoded) instruction; |
|
243 * 'datatype' stores the actual size and datatype |
|
244 * of the operand */ |
|
245 |
|
246 /* ------------------------ Operand Type ----------------- */ |
|
247 case OPTYPE_c: /* byte or word [op size attr] */ |
|
248 size = (insn->op_size == 4) ? 2 : 1; |
|
249 op->datatype = (size == 4) ? op_word : op_byte; |
|
250 break; |
|
251 case OPTYPE_a: /* 2 word or 2 dword [op size attr] */ |
|
252 /* pointer to a 16:16 or 32:32 BOUNDS operand */ |
|
253 size = (insn->op_size == 4) ? 8 : 4; |
|
254 op->datatype = (size == 4) ? op_bounds32 : op_bounds16; |
|
255 break; |
|
256 case OPTYPE_v: /* word or dword [op size attr] */ |
|
257 size = (insn->op_size == 4) ? 4 : 2; |
|
258 op->datatype = (size == 4) ? op_dword : op_word; |
|
259 break; |
|
260 case OPTYPE_p: /* 32/48-bit ptr [op size attr] */ |
|
261 /* technically these flags are not accurate: the |
|
262 * value s a 16:16 pointer or a 16:32 pointer, where |
|
263 * the first '16' is a segment */ |
|
264 size = (insn->addr_size == 4) ? 6 : 4; |
|
265 op->datatype = (size == 4) ? op_descr32 : op_descr16; |
|
266 break; |
|
267 case OPTYPE_b: /* byte, ignore op-size */ |
|
268 size = 1; |
|
269 op->datatype = op_byte; |
|
270 break; |
|
271 case OPTYPE_w: /* word, ignore op-size */ |
|
272 size = 2; |
|
273 op->datatype = op_word; |
|
274 break; |
|
275 case OPTYPE_d: /* dword , ignore op-size */ |
|
276 size = 4; |
|
277 op->datatype = op_dword; |
|
278 break; |
|
279 case OPTYPE_s: /* 6-byte psuedo-descriptor */ |
|
280 /* ptr to 6-byte value which is 32:16 in 32-bit |
|
281 * mode, or 8:24:16 in 16-bit mode. The high byte |
|
282 * is ignored in 16-bit mode. */ |
|
283 size = 6; |
|
284 op->datatype = (insn->addr_size == 4) ? |
|
285 op_pdescr32 : op_pdescr16; |
|
286 break; |
|
287 case OPTYPE_q: /* qword, ignore op-size */ |
|
288 size = 8; |
|
289 op->datatype = op_qword; |
|
290 break; |
|
291 case OPTYPE_dq: /* d-qword, ignore op-size */ |
|
292 size = 16; |
|
293 op->datatype = op_dqword; |
|
294 break; |
|
295 case OPTYPE_ps: /* 128-bit FP data */ |
|
296 size = 16; |
|
297 /* really this is 4 packed SP FP values */ |
|
298 op->datatype = op_ssimd; |
|
299 break; |
|
300 case OPTYPE_pd: /* 128-bit FP data */ |
|
301 size = 16; |
|
302 /* really this is 2 packed DP FP values */ |
|
303 op->datatype = op_dsimd; |
|
304 break; |
|
305 case OPTYPE_ss: /* Scalar elem of 128-bit FP data */ |
|
306 size = 16; |
|
307 /* this only looks at the low dword (4 bytes) |
|
308 * of the xmmm register passed as a param. |
|
309 * This is a 16-byte register where only 4 bytes |
|
310 * are used in the insn. Painful, ain't it? */ |
|
311 op->datatype = op_sssimd; |
|
312 break; |
|
313 case OPTYPE_sd: /* Scalar elem of 128-bit FP data */ |
|
314 size = 16; |
|
315 /* this only looks at the low qword (8 bytes) |
|
316 * of the xmmm register passed as a param. |
|
317 * This is a 16-byte register where only 8 bytes |
|
318 * are used in the insn. Painful, again... */ |
|
319 op->datatype = op_sdsimd; |
|
320 break; |
|
321 case OPTYPE_pi: /* qword mmx register */ |
|
322 size = 8; |
|
323 op->datatype = op_qword; |
|
324 break; |
|
325 case OPTYPE_si: /* dword integer register */ |
|
326 size = 4; |
|
327 op->datatype = op_dword; |
|
328 break; |
|
329 case OPTYPE_fs: /* single-real */ |
|
330 size = 4; |
|
331 op->datatype = op_sreal; |
|
332 break; |
|
333 case OPTYPE_fd: /* double real */ |
|
334 size = 8; |
|
335 op->datatype = op_dreal; |
|
336 break; |
|
337 case OPTYPE_fe: /* extended real */ |
|
338 size = 10; |
|
339 op->datatype = op_extreal; |
|
340 break; |
|
341 case OPTYPE_fb: /* packed BCD */ |
|
342 size = 10; |
|
343 op->datatype = op_bcd; |
|
344 break; |
|
345 case OPTYPE_fv: /* pointer to FPU env: 14 or 28-bytes */ |
|
346 size = (insn->addr_size == 4)? 28 : 14; |
|
347 op->datatype = (size == 28)? op_fpuenv32: op_fpuenv16; |
|
348 break; |
|
349 case OPTYPE_ft: /* pointer to FPU env: 94 or 108 bytes */ |
|
350 size = (insn->addr_size == 4)? 108 : 94; |
|
351 op->datatype = (size == 108)? |
|
352 op_fpustate32: op_fpustate16; |
|
353 break; |
|
354 case OPTYPE_fx: /* 512-byte register stack */ |
|
355 size = 512; |
|
356 op->datatype = op_fpregset; |
|
357 break; |
|
358 case OPTYPE_fp: /* floating point register */ |
|
359 size = 10; /* double extended precision */ |
|
360 op->datatype = op_fpreg; |
|
361 break; |
|
362 case OPTYPE_m: /* fake operand type used for "lea Gv, M" */ |
|
363 size = insn->addr_size; |
|
364 op->datatype = (size == 4) ? op_dword : op_word; |
|
365 break; |
|
366 case OPTYPE_none: /* handle weird instructions that have no encoding but use a dword datatype, like invlpg */ |
|
367 size = 0; |
|
368 op->datatype = op_none; |
|
369 break; |
|
370 case 0: |
|
371 default: |
|
372 size = insn->op_size; |
|
373 op->datatype = (size == 4) ? op_dword : op_word; |
|
374 break; |
|
375 } |
|
376 return size; |
|
377 } |
|
378 |
|
379 size_t ia32_decode_operand( unsigned char *buf, size_t buf_len, |
|
380 x86_insn_t *insn, unsigned int raw_op, |
|
381 unsigned int raw_flags, unsigned int prefixes, |
|
382 unsigned char modrm ) { |
|
383 unsigned int addr_meth, op_type, op_size, gen_regs; |
|
384 x86_op_t *op; |
|
385 size_t size; |
|
386 |
|
387 /* ++ Yank optype and addr mode out of operand flags */ |
|
388 addr_meth = raw_flags & ADDRMETH_MASK; |
|
389 op_type = raw_flags & OPTYPE_MASK; |
|
390 |
|
391 if ( raw_flags == ARG_NONE ) { |
|
392 /* operand is not used in this instruction */ |
|
393 return 0; |
|
394 } |
|
395 |
|
396 /* allocate a new operand */ |
|
397 op = x86_operand_new( insn ); |
|
398 |
|
399 /* ++ Copy flags from opcode table to x86_insn_t */ |
|
400 op->access = (enum x86_op_access) OP_PERM(raw_flags); |
|
401 op->flags = (enum x86_op_flags) (OP_FLAGS(raw_flags) >> 12); |
|
402 |
|
403 /* Get size (for decoding) and datatype of operand */ |
|
404 op_size = decode_operand_size(op_type, insn, op); |
|
405 |
|
406 /* override default register set based on Operand Type */ |
|
407 /* this allows mixing of 8, 16, and 32 bit regs in insn */ |
|
408 if (op_size == 1) { |
|
409 gen_regs = REG_BYTE_OFFSET; |
|
410 } else if (op_size == 2) { |
|
411 gen_regs = REG_WORD_OFFSET; |
|
412 } else { |
|
413 gen_regs = REG_DWORD_OFFSET; |
|
414 } |
|
415 |
|
416 size = decode_operand_value( buf, buf_len, op, insn, addr_meth, |
|
417 op_size, raw_op, modrm, gen_regs ); |
|
418 |
|
419 /* if operand is an address, apply any segment override prefixes */ |
|
420 if ( op->type == op_expression || op->type == op_offset ) { |
|
421 apply_seg(op, prefixes); |
|
422 } |
|
423 |
|
424 return size; /* return number of bytes in instruction */ |
|
425 } |