|
1 #ifndef LIBDISASM_H |
|
2 #define LIBDISASM_H |
|
3 |
|
4 #ifdef WIN32 |
|
5 #include <windows.h> |
|
6 #endif |
|
7 |
|
8 #include <stdint.h> |
|
9 |
|
10 /* 'NEW" types |
|
11 * __________________________________________________________________________*/ |
|
12 #ifndef LIBDISASM_QWORD_H /* do not interfere with qword.h */ |
|
13 #define LIBDISASM_QWORD_H |
|
14 #ifdef _MSC_VER |
|
15 typedef __int64 qword_t; |
|
16 #else |
|
17 typedef int64_t qword_t; |
|
18 #endif |
|
19 #endif |
|
20 |
|
21 #include <sys/types.h> |
|
22 |
|
23 #ifdef __cplusplus |
|
24 extern "C" { |
|
25 #endif |
|
26 |
|
27 /* 'NEW" x86 API |
|
28 * __________________________________________________________________________*/ |
|
29 |
|
30 |
|
31 /* ========================================= Error Reporting */ |
|
32 /* REPORT CODES |
|
33 * These are passed to a reporter function passed at initialization. |
|
34 * Each code determines the type of the argument passed to the reporter; |
|
35 * this allows the report to recover from errors, or just log them. |
|
36 */ |
|
37 enum x86_report_codes { |
|
38 report_disasm_bounds, /* RVA OUT OF BOUNDS : The disassembler could |
|
39 not disassemble the supplied RVA as it is |
|
40 out of the range of the buffer. The |
|
41 application should store the address and |
|
42 attempt to determine what section of the |
|
43 binary it is in, then disassemble the |
|
44 address from the bytes in that section. |
|
45 data: uint32_t rva */ |
|
46 report_insn_bounds, /* INSTRUCTION OUT OF BOUNDS: The disassembler |
|
47 could not disassemble the instruction as |
|
48 the instruction would require bytes beyond |
|
49 the end of the current buffer. This usually |
|
50 indicated garbage bytes at the end of a |
|
51 buffer, or an incorrectly-sized buffer. |
|
52 data: uint32_t rva */ |
|
53 report_invalid_insn, /* INVALID INSTRUCTION: The disassembler could |
|
54 not disassemble the instruction as it has an |
|
55 invalid combination of opcodes and operands. |
|
56 This will stop automated disassembly; the |
|
57 application can restart the disassembly |
|
58 after the invalid instruction. |
|
59 data: uint32_t rva */ |
|
60 report_unknown |
|
61 }; |
|
62 |
|
63 /* 'arg' is optional arbitrary data provided by the code passing the |
|
64 * callback -- for example, it could be 'this' or 'self' in OOP code. |
|
65 * 'code' is provided by libdisasm, it is one of the above |
|
66 * 'data' is provided by libdisasm and is context-specific, per the enums */ |
|
67 typedef void (*DISASM_REPORTER)( enum x86_report_codes code, |
|
68 void *data, void *arg ); |
|
69 |
|
70 |
|
71 /* x86_report_error : Call the register reporter to report an error */ |
|
72 void x86_report_error( enum x86_report_codes code, void *data ); |
|
73 |
|
74 /* ========================================= Libdisasm Management Routines */ |
|
75 enum x86_options { /* these can be ORed together */ |
|
76 opt_none= 0, |
|
77 opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */ |
|
78 opt_16_bit=2, /* 16-bit/DOS disassembly */ |
|
79 opt_att_mnemonics=4, /* use AT&T syntax names for alternate opcode mnemonics */ |
|
80 }; |
|
81 |
|
82 /* management routines */ |
|
83 /* 'arg' is caller-specific data which is passed as the first argument |
|
84 * to the reporter callback routine */ |
|
85 int x86_init( enum x86_options options, DISASM_REPORTER reporter, void *arg); |
|
86 void x86_set_reporter( DISASM_REPORTER reporter, void *arg); |
|
87 void x86_set_options( enum x86_options options ); |
|
88 enum x86_options x86_get_options( void ); |
|
89 int x86_cleanup(void); |
|
90 |
|
91 |
|
92 /* ========================================= Instruction Representation */ |
|
93 /* these defines are only intended for use in the array decl's */ |
|
94 #define MAX_REGNAME 8 |
|
95 |
|
96 #define MAX_PREFIX_STR 32 |
|
97 #define MAX_MNEM_STR 16 |
|
98 #define MAX_INSN_SIZE 20 /* same as in i386.h */ |
|
99 #define MAX_OP_STRING 32 /* max possible operand size in string form */ |
|
100 #define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */ |
|
101 #define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */ |
|
102 #define MAX_NUM_OPERANDS 8 /* max # implicit and explicit operands */ |
|
103 /* in these, the '2 *' is arbitrary: the max # of operands should require |
|
104 * more space than the rest of the insn */ |
|
105 #define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */ |
|
106 #define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */ |
|
107 #define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */ |
|
108 |
|
109 enum x86_reg_type { /* NOTE: these may be ORed together */ |
|
110 reg_gen = 0x00001, /* general purpose */ |
|
111 reg_in = 0x00002, /* incoming args, ala RISC */ |
|
112 reg_out = 0x00004, /* args to calls, ala RISC */ |
|
113 reg_local = 0x00008, /* local vars, ala RISC */ |
|
114 reg_fpu = 0x00010, /* FPU data register */ |
|
115 reg_seg = 0x00020, /* segment register */ |
|
116 reg_simd = 0x00040, /* SIMD/MMX reg */ |
|
117 reg_sys = 0x00080, /* restricted/system register */ |
|
118 reg_sp = 0x00100, /* stack pointer */ |
|
119 reg_fp = 0x00200, /* frame pointer */ |
|
120 reg_pc = 0x00400, /* program counter */ |
|
121 reg_retaddr = 0x00800, /* return addr for func */ |
|
122 reg_cond = 0x01000, /* condition code / flags */ |
|
123 reg_zero = 0x02000, /* zero register, ala RISC */ |
|
124 reg_ret = 0x04000, /* return value */ |
|
125 reg_src = 0x10000, /* array/rep source */ |
|
126 reg_dest = 0x20000, /* array/rep destination */ |
|
127 reg_count = 0x40000 /* array/rep/loop counter */ |
|
128 }; |
|
129 |
|
130 /* x86_reg_t : an X86 CPU register */ |
|
131 typedef struct { |
|
132 char name[MAX_REGNAME]; |
|
133 enum x86_reg_type type; /* what register is used for */ |
|
134 unsigned int size; /* size of register in bytes */ |
|
135 unsigned int id; /* register ID #, for quick compares */ |
|
136 unsigned int alias; /* ID of reg this is an alias for */ |
|
137 unsigned int shift; /* amount to shift aliased reg by */ |
|
138 } x86_reg_t; |
|
139 |
|
140 /* x86_ea_t : an X86 effective address (address expression) */ |
|
141 typedef struct { |
|
142 unsigned int scale; /* scale factor */ |
|
143 x86_reg_t index, base; /* index, base registers */ |
|
144 int32_t disp; /* displacement */ |
|
145 char disp_sign; /* is negative? 1/0 */ |
|
146 char disp_size; /* 0, 1, 2, 4 */ |
|
147 } x86_ea_t; |
|
148 |
|
149 /* x86_absolute_t : an X86 segment:offset address (descriptor) */ |
|
150 typedef struct { |
|
151 unsigned short segment; /* loaded directly into CS */ |
|
152 union { |
|
153 unsigned short off16; /* loaded directly into IP */ |
|
154 uint32_t off32; /* loaded directly into EIP */ |
|
155 } offset; |
|
156 } x86_absolute_t; |
|
157 |
|
158 enum x86_op_type { /* mutually exclusive */ |
|
159 op_unused = 0, /* empty/unused operand: should never occur */ |
|
160 op_register = 1, /* CPU register */ |
|
161 op_immediate = 2, /* Immediate Value */ |
|
162 op_relative_near = 3, /* Relative offset from IP */ |
|
163 op_relative_far = 4, /* Relative offset from IP */ |
|
164 op_absolute = 5, /* Absolute address (ptr16:32) */ |
|
165 op_expression = 6, /* Address expression (scale/index/base/disp) */ |
|
166 op_offset = 7, /* Offset from start of segment (m32) */ |
|
167 op_unknown |
|
168 }; |
|
169 |
|
170 #define x86_optype_is_address( optype ) \ |
|
171 ( optype == op_absolute || optype == op_offset ) |
|
172 #define x86_optype_is_relative( optype ) \ |
|
173 ( optype == op_relative_near || optype == op_relative_far ) |
|
174 #define x86_optype_is_memory( optype ) \ |
|
175 ( optype > op_immediate && optype < op_unknown ) |
|
176 |
|
177 enum x86_op_datatype { /* these use Intel's lame terminology */ |
|
178 op_byte = 1, /* 1 byte integer */ |
|
179 op_word = 2, /* 2 byte integer */ |
|
180 op_dword = 3, /* 4 byte integer */ |
|
181 op_qword = 4, /* 8 byte integer */ |
|
182 op_dqword = 5, /* 16 byte integer */ |
|
183 op_sreal = 6, /* 4 byte real (single real) */ |
|
184 op_dreal = 7, /* 8 byte real (double real) */ |
|
185 op_extreal = 8, /* 10 byte real (extended real) */ |
|
186 op_bcd = 9, /* 10 byte binary-coded decimal */ |
|
187 op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */ |
|
188 op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */ |
|
189 op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */ |
|
190 op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */ |
|
191 op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */ |
|
192 op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */ |
|
193 op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */ |
|
194 op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */ |
|
195 op_bounds16 = 18, /* signed 16:16 lower:upper bounds */ |
|
196 op_bounds32 = 19, /* signed 32:32 lower:upper bounds */ |
|
197 op_fpuenv16 = 20, /* 14 byte FPU control/environment data */ |
|
198 op_fpuenv32 = 21, /* 28 byte FPU control/environment data */ |
|
199 op_fpustate16 = 22, /* 94 byte FPU state (env & reg stack) */ |
|
200 op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */ |
|
201 op_fpregset = 24, /* 512 bytes: register set */ |
|
202 op_fpreg = 25, /* FPU register */ |
|
203 op_none = 0xFF, /* operand without a datatype (INVLPG) */ |
|
204 }; |
|
205 |
|
206 enum x86_op_access { /* ORed together */ |
|
207 op_read = 1, |
|
208 op_write = 2, |
|
209 op_execute = 4 |
|
210 }; |
|
211 |
|
212 enum x86_op_flags { /* ORed together, but segs are mutually exclusive */ |
|
213 op_signed = 1, /* signed integer */ |
|
214 op_string = 2, /* possible string or array */ |
|
215 op_constant = 4, /* symbolic constant */ |
|
216 op_pointer = 8, /* operand points to a memory address */ |
|
217 op_sysref = 0x010, /* operand is a syscall number */ |
|
218 op_implied = 0x020, /* operand is implicit in the insn */ |
|
219 op_hardcode = 0x40, /* operand is hardcoded in insn definition */ |
|
220 /* NOTE: an 'implied' operand is one which can be considered a side |
|
221 * effect of the insn, e.g. %esp being modified by PUSH or POP. A |
|
222 * 'hard-coded' operand is one which is specified in the instruction |
|
223 * definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference |
|
224 * is that hard-coded operands are printed by disassemblers and are |
|
225 * required to re-assemble, while implicit operands are invisible. */ |
|
226 op_es_seg = 0x100, /* ES segment override */ |
|
227 op_cs_seg = 0x200, /* CS segment override */ |
|
228 op_ss_seg = 0x300, /* SS segment override */ |
|
229 op_ds_seg = 0x400, /* DS segment override */ |
|
230 op_fs_seg = 0x500, /* FS segment override */ |
|
231 op_gs_seg = 0x600 /* GS segment override */ |
|
232 }; |
|
233 |
|
234 /* x86_op_t : an X86 instruction operand */ |
|
235 typedef struct { |
|
236 enum x86_op_type type; /* operand type */ |
|
237 enum x86_op_datatype datatype; /* operand size */ |
|
238 enum x86_op_access access; /* operand access [RWX] */ |
|
239 enum x86_op_flags flags; /* misc flags */ |
|
240 union { |
|
241 /* sizeof will have to work on these union members! */ |
|
242 /* immediate values */ |
|
243 char sbyte; |
|
244 short sword; |
|
245 int32_t sdword; |
|
246 qword_t sqword; |
|
247 unsigned char byte; |
|
248 unsigned short word; |
|
249 uint32_t dword; |
|
250 qword_t qword; |
|
251 float sreal; |
|
252 double dreal; |
|
253 /* misc large/non-native types */ |
|
254 unsigned char extreal[10]; |
|
255 unsigned char bcd[10]; |
|
256 qword_t dqword[2]; |
|
257 unsigned char simd[16]; |
|
258 unsigned char fpuenv[28]; |
|
259 /* offset from segment */ |
|
260 uint32_t offset; |
|
261 /* ID of CPU register */ |
|
262 x86_reg_t reg; |
|
263 /* offsets from current insn */ |
|
264 char relative_near; |
|
265 int32_t relative_far; |
|
266 /* segment:offset */ |
|
267 x86_absolute_t absolute; |
|
268 /* effective address [expression] */ |
|
269 x86_ea_t expression; |
|
270 } data; |
|
271 /* this is needed to make formatting operands more sane */ |
|
272 void * insn; /* pointer to x86_insn_t owning operand */ |
|
273 } x86_op_t; |
|
274 |
|
275 /* Linked list of x86_op_t; provided for manual traversal of the operand |
|
276 * list in an insn. Users wishing to add operands to this list, e.g. to add |
|
277 * implicit operands, should use x86_operand_new in x86_operand_list.h */ |
|
278 typedef struct x86_operand_list { |
|
279 x86_op_t op; |
|
280 struct x86_operand_list *next; |
|
281 } x86_oplist_t; |
|
282 |
|
283 enum x86_insn_group { |
|
284 insn_none = 0, /* invalid instruction */ |
|
285 insn_controlflow = 1, |
|
286 insn_arithmetic = 2, |
|
287 insn_logic = 3, |
|
288 insn_stack = 4, |
|
289 insn_comparison = 5, |
|
290 insn_move = 6, |
|
291 insn_string = 7, |
|
292 insn_bit_manip = 8, |
|
293 insn_flag_manip = 9, |
|
294 insn_fpu = 10, |
|
295 insn_interrupt = 13, |
|
296 insn_system = 14, |
|
297 insn_other = 15 |
|
298 }; |
|
299 |
|
300 enum x86_insn_type { |
|
301 insn_invalid = 0, /* invalid instruction */ |
|
302 /* insn_controlflow */ |
|
303 insn_jmp = 0x1001, |
|
304 insn_jcc = 0x1002, |
|
305 insn_call = 0x1003, |
|
306 insn_callcc = 0x1004, |
|
307 insn_return = 0x1005, |
|
308 /* insn_arithmetic */ |
|
309 insn_add = 0x2001, |
|
310 insn_sub = 0x2002, |
|
311 insn_mul = 0x2003, |
|
312 insn_div = 0x2004, |
|
313 insn_inc = 0x2005, |
|
314 insn_dec = 0x2006, |
|
315 insn_shl = 0x2007, |
|
316 insn_shr = 0x2008, |
|
317 insn_rol = 0x2009, |
|
318 insn_ror = 0x200A, |
|
319 /* insn_logic */ |
|
320 insn_and = 0x3001, |
|
321 insn_or = 0x3002, |
|
322 insn_xor = 0x3003, |
|
323 insn_not = 0x3004, |
|
324 insn_neg = 0x3005, |
|
325 /* insn_stack */ |
|
326 insn_push = 0x4001, |
|
327 insn_pop = 0x4002, |
|
328 insn_pushregs = 0x4003, |
|
329 insn_popregs = 0x4004, |
|
330 insn_pushflags = 0x4005, |
|
331 insn_popflags = 0x4006, |
|
332 insn_enter = 0x4007, |
|
333 insn_leave = 0x4008, |
|
334 /* insn_comparison */ |
|
335 insn_test = 0x5001, |
|
336 insn_cmp = 0x5002, |
|
337 /* insn_move */ |
|
338 insn_mov = 0x6001, /* move */ |
|
339 insn_movcc = 0x6002, /* conditional move */ |
|
340 insn_xchg = 0x6003, /* exchange */ |
|
341 insn_xchgcc = 0x6004, /* conditional exchange */ |
|
342 /* insn_string */ |
|
343 insn_strcmp = 0x7001, |
|
344 insn_strload = 0x7002, |
|
345 insn_strmov = 0x7003, |
|
346 insn_strstore = 0x7004, |
|
347 insn_translate = 0x7005, /* xlat */ |
|
348 /* insn_bit_manip */ |
|
349 insn_bittest = 0x8001, |
|
350 insn_bitset = 0x8002, |
|
351 insn_bitclear = 0x8003, |
|
352 /* insn_flag_manip */ |
|
353 insn_clear_carry = 0x9001, |
|
354 insn_clear_zero = 0x9002, |
|
355 insn_clear_oflow = 0x9003, |
|
356 insn_clear_dir = 0x9004, |
|
357 insn_clear_sign = 0x9005, |
|
358 insn_clear_parity = 0x9006, |
|
359 insn_set_carry = 0x9007, |
|
360 insn_set_zero = 0x9008, |
|
361 insn_set_oflow = 0x9009, |
|
362 insn_set_dir = 0x900A, |
|
363 insn_set_sign = 0x900B, |
|
364 insn_set_parity = 0x900C, |
|
365 insn_tog_carry = 0x9010, |
|
366 insn_tog_zero = 0x9020, |
|
367 insn_tog_oflow = 0x9030, |
|
368 insn_tog_dir = 0x9040, |
|
369 insn_tog_sign = 0x9050, |
|
370 insn_tog_parity = 0x9060, |
|
371 /* insn_fpu */ |
|
372 insn_fmov = 0xA001, |
|
373 insn_fmovcc = 0xA002, |
|
374 insn_fneg = 0xA003, |
|
375 insn_fabs = 0xA004, |
|
376 insn_fadd = 0xA005, |
|
377 insn_fsub = 0xA006, |
|
378 insn_fmul = 0xA007, |
|
379 insn_fdiv = 0xA008, |
|
380 insn_fsqrt = 0xA009, |
|
381 insn_fcmp = 0xA00A, |
|
382 insn_fcos = 0xA00C, |
|
383 insn_fldpi = 0xA00D, |
|
384 insn_fldz = 0xA00E, |
|
385 insn_ftan = 0xA00F, |
|
386 insn_fsine = 0xA010, |
|
387 insn_fsys = 0xA020, |
|
388 /* insn_interrupt */ |
|
389 insn_int = 0xD001, |
|
390 insn_intcc = 0xD002, /* not present in x86 ISA */ |
|
391 insn_iret = 0xD003, |
|
392 insn_bound = 0xD004, |
|
393 insn_debug = 0xD005, |
|
394 insn_trace = 0xD006, |
|
395 insn_invalid_op = 0xD007, |
|
396 insn_oflow = 0xD008, |
|
397 /* insn_system */ |
|
398 insn_halt = 0xE001, |
|
399 insn_in = 0xE002, /* input from port/bus */ |
|
400 insn_out = 0xE003, /* output to port/bus */ |
|
401 insn_cpuid = 0xE004, |
|
402 /* insn_other */ |
|
403 insn_nop = 0xF001, |
|
404 insn_bcdconv = 0xF002, /* convert to or from BCD */ |
|
405 insn_szconv = 0xF003 /* change size of operand */ |
|
406 }; |
|
407 |
|
408 /* These flags specify special characteristics of the instruction, such as |
|
409 * whether the inatruction is privileged or whether it serializes the |
|
410 * pipeline. |
|
411 * NOTE : These may not be accurate for all instructions; updates to the |
|
412 * opcode tables have not been completed. */ |
|
413 enum x86_insn_note { |
|
414 insn_note_ring0 = 1, /* Only available in ring 0 */ |
|
415 insn_note_smm = 2, /* "" in System Management Mode */ |
|
416 insn_note_serial = 4, /* Serializing instruction */ |
|
417 insn_note_nonswap = 8, /* Does not swap arguments in att-style formatting */ |
|
418 insn_note_nosuffix = 16, /* Does not have size suffix in att-style formatting */ |
|
419 }; |
|
420 |
|
421 /* This specifies what effects the instruction has on the %eflags register */ |
|
422 enum x86_flag_status { |
|
423 insn_carry_set = 0x1, /* CF */ |
|
424 insn_zero_set = 0x2, /* ZF */ |
|
425 insn_oflow_set = 0x4, /* OF */ |
|
426 insn_dir_set = 0x8, /* DF */ |
|
427 insn_sign_set = 0x10, /* SF */ |
|
428 insn_parity_set = 0x20, /* PF */ |
|
429 insn_carry_or_zero_set = 0x40, |
|
430 insn_zero_set_or_sign_ne_oflow = 0x80, |
|
431 insn_carry_clear = 0x100, |
|
432 insn_zero_clear = 0x200, |
|
433 insn_oflow_clear = 0x400, |
|
434 insn_dir_clear = 0x800, |
|
435 insn_sign_clear = 0x1000, |
|
436 insn_parity_clear = 0x2000, |
|
437 insn_sign_eq_oflow = 0x4000, |
|
438 insn_sign_ne_oflow = 0x8000 |
|
439 }; |
|
440 |
|
441 /* The CPU model in which the insturction first appeared; this can be used |
|
442 * to mask out instructions appearing in earlier or later models or to |
|
443 * check the portability of a binary. |
|
444 * NOTE : These may not be accurate for all instructions; updates to the |
|
445 * opcode tables have not been completed. */ |
|
446 enum x86_insn_cpu { |
|
447 cpu_8086 = 1, /* Intel */ |
|
448 cpu_80286 = 2, |
|
449 cpu_80386 = 3, |
|
450 cpu_80387 = 4, |
|
451 cpu_80486 = 5, |
|
452 cpu_pentium = 6, |
|
453 cpu_pentiumpro = 7, |
|
454 cpu_pentium2 = 8, |
|
455 cpu_pentium3 = 9, |
|
456 cpu_pentium4 = 10, |
|
457 cpu_k6 = 16, /* AMD */ |
|
458 cpu_k7 = 32, |
|
459 cpu_athlon = 48 |
|
460 }; |
|
461 |
|
462 /* CPU ISA subsets: These are derived from the Instruction Groups in |
|
463 * Intel Vol 1 Chapter 5; they represent subsets of the IA32 ISA but |
|
464 * do not reflect the 'type' of the instruction in the same way that |
|
465 * x86_insn_group does. In short, these are AMD/Intel's somewhat useless |
|
466 * designations. |
|
467 * NOTE : These may not be accurate for all instructions; updates to the |
|
468 * opcode tables have not been completed. */ |
|
469 enum x86_insn_isa { |
|
470 isa_gp = 1, /* general purpose */ |
|
471 isa_fp = 2, /* floating point */ |
|
472 isa_fpumgt = 3, /* FPU/SIMD management */ |
|
473 isa_mmx = 4, /* Intel MMX */ |
|
474 isa_sse1 = 5, /* Intel SSE SIMD */ |
|
475 isa_sse2 = 6, /* Intel SSE2 SIMD */ |
|
476 isa_sse3 = 7, /* Intel SSE3 SIMD */ |
|
477 isa_3dnow = 8, /* AMD 3DNow! SIMD */ |
|
478 isa_sys = 9 /* system instructions */ |
|
479 }; |
|
480 |
|
481 enum x86_insn_prefix { |
|
482 insn_no_prefix = 0, |
|
483 insn_rep_zero = 1, /* REPZ and REPE */ |
|
484 insn_rep_notzero = 2, /* REPNZ and REPNZ */ |
|
485 insn_lock = 4 /* LOCK: */ |
|
486 }; |
|
487 |
|
488 /* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */ |
|
489 /* x86_insn_t : an X86 instruction */ |
|
490 typedef struct { |
|
491 /* information about the instruction */ |
|
492 uint32_t addr; /* load address */ |
|
493 uint32_t offset; /* offset into file/buffer */ |
|
494 enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */ |
|
495 enum x86_insn_type type; /* type, e.g. INS_BRANCH */ |
|
496 enum x86_insn_note note; /* note, e.g. RING0 */ |
|
497 unsigned char bytes[MAX_INSN_SIZE]; |
|
498 unsigned char size; /* size of insn in bytes */ |
|
499 /* 16/32-bit mode settings */ |
|
500 unsigned char addr_size; /* default address size : 2 or 4 */ |
|
501 unsigned char op_size; /* default operand size : 2 or 4 */ |
|
502 /* CPU/instruction set */ |
|
503 enum x86_insn_cpu cpu; |
|
504 enum x86_insn_isa isa; |
|
505 /* flags */ |
|
506 enum x86_flag_status flags_set; /* flags set or tested by insn */ |
|
507 enum x86_flag_status flags_tested; |
|
508 /* stack */ |
|
509 unsigned char stack_mod; /* 0 or 1 : is the stack modified? */ |
|
510 int32_t stack_mod_val; /* val stack is modified by if known */ |
|
511 |
|
512 /* the instruction proper */ |
|
513 enum x86_insn_prefix prefix; /* prefixes ORed together */ |
|
514 char prefix_string[MAX_PREFIX_STR]; /* prefixes [might be truncated] */ |
|
515 char mnemonic[MAX_MNEM_STR]; |
|
516 x86_oplist_t *operands; /* list of explicit/implicit operands */ |
|
517 size_t operand_count; /* total number of operands */ |
|
518 size_t explicit_count; /* number of explicit operands */ |
|
519 /* convenience fields for user */ |
|
520 void *block; /* code block containing this insn */ |
|
521 void *function; /* function containing this insn */ |
|
522 int tag; /* tag the insn as seen/processed */ |
|
523 } x86_insn_t; |
|
524 |
|
525 |
|
526 /* returns 0 if an instruction is invalid, 1 if valid */ |
|
527 int x86_insn_is_valid( x86_insn_t *insn ); |
|
528 |
|
529 /* DISASSEMBLY ROUTINES |
|
530 * Canonical order of arguments is |
|
531 * (buf, buf_len, buf_rva, offset, len, insn, func, arg, resolve_func) |
|
532 * ...but of course all of these are not used at the same time. |
|
533 */ |
|
534 |
|
535 |
|
536 /* Function prototype for caller-supplied callback routine |
|
537 * These callbacks are intended to process 'insn' further, e.g. by |
|
538 * adding it to a linked list, database, etc */ |
|
539 typedef void (*DISASM_CALLBACK)( x86_insn_t *insn, void * arg ); |
|
540 |
|
541 /* Function prototype for caller-supplied address resolver. |
|
542 * This routine is used to determine the rva to disassemble next, given |
|
543 * the 'dest' operand of a jump/call. This allows the caller to resolve |
|
544 * jump/call targets stored in a register or on the stack, and also allows |
|
545 * the caller to prevent endless loops by checking if an address has |
|
546 * already been disassembled. If an address cannot be resolved from the |
|
547 * operand, or if the address has already been disassembled, this routine |
|
548 * should return -1; in all other cases the RVA to be disassembled next |
|
549 * should be returned. */ |
|
550 typedef int32_t (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t * current_insn, |
|
551 void *arg ); |
|
552 |
|
553 |
|
554 /* x86_disasm: Disassemble a single instruction from a buffer of bytes. |
|
555 * Returns size of instruction in bytes. |
|
556 * Caller is responsible for calling x86_oplist_free() on |
|
557 * a reused "insn" to avoid leaking memory when calling this |
|
558 * function repeatedly. |
|
559 * buf : Buffer of bytes to disassemble |
|
560 * buf_len : Length of the buffer |
|
561 * buf_rva : Load address of the start of the buffer |
|
562 * offset : Offset in buffer to disassemble |
|
563 * insn : Structure to fill with disassembled instruction |
|
564 */ |
|
565 unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len, |
|
566 uint32_t buf_rva, unsigned int offset, |
|
567 x86_insn_t * insn ); |
|
568 |
|
569 /* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer, |
|
570 * invoking a callback function each time an instruction |
|
571 * is successfully disassembled. The 'range' refers to the |
|
572 * bytes between 'offset' and 'offset + len' in the buffer; |
|
573 * 'len' is assumed to be less than the length of the buffer. |
|
574 * Returns number of instructions processed. |
|
575 * buf : Buffer of bytes to disassemble (e.g. .text section) |
|
576 * buf_rva : Load address of buffer (e.g. ELF Virtual Address) |
|
577 * offset : Offset in buffer to start disassembly at |
|
578 * len : Number of bytes to disassemble |
|
579 * func : Callback function to invoke (may be NULL) |
|
580 * arg : Arbitrary data to pass to callback (may be NULL) |
|
581 */ |
|
582 unsigned int x86_disasm_range( unsigned char *buf, uint32_t buf_rva, |
|
583 unsigned int offset, unsigned int len, |
|
584 DISASM_CALLBACK func, void *arg ); |
|
585 |
|
586 /* x86_disasm_forward: Flow-of-execution disassembly of the bytes in a buffer, |
|
587 * invoking a callback function each time an instruction |
|
588 * is successfully disassembled. |
|
589 * buf : Buffer to disassemble (e.g. .text section) |
|
590 * buf_len : Number of bytes in buffer |
|
591 * buf_rva : Load address of buffer (e.g. ELF Virtual Address) |
|
592 * offset : Offset in buffer to start disassembly at (e.g. entry point) |
|
593 * func : Callback function to invoke (may be NULL) |
|
594 * arg : Arbitrary data to pass to callback (may be NULL) |
|
595 * resolver: Caller-supplied address resolver. If no resolver is |
|
596 * supplied, a default internal one is used -- however the |
|
597 * internal resolver does NOT catch loops and could end up |
|
598 * disassembling forever.. |
|
599 * r_arg : Arbitrary data to pass to resolver (may be NULL) |
|
600 */ |
|
601 unsigned int x86_disasm_forward( unsigned char *buf, unsigned int buf_len, |
|
602 uint32_t buf_rva, unsigned int offset, |
|
603 DISASM_CALLBACK func, void *arg, |
|
604 DISASM_RESOLVER resolver, void *r_arg ); |
|
605 |
|
606 /* Instruction operands: these are stored as a list of explicit and |
|
607 * implicit operands. It is recommended that the 'foreach' routines |
|
608 * be used to when examining operands for purposes of data flow analysis */ |
|
609 |
|
610 /* Operand FOREACH callback: 'arg' is an abritrary parameter passed to the |
|
611 * foreach routine, 'insn' is the x86_insn_t whose operands are being |
|
612 * iterated over, and 'op' is the current x86_op_t */ |
|
613 typedef void (*x86_operand_fn)(x86_op_t *op, x86_insn_t *insn, void *arg); |
|
614 |
|
615 /* FOREACH types: these are used to limit the foreach results to |
|
616 * operands which match a certain "type" (implicit or explicit) |
|
617 * or which are accessed in certain ways (e.g. read or write). Note |
|
618 * that this operates on the operand list of single instruction, so |
|
619 * specifying the 'real' operand type (register, memory, etc) is not |
|
620 * useful. Note also that by definition Execute Access implies Read |
|
621 * Access and implies Not Write Access. |
|
622 * The "type" (implicit or explicit) and the access method can |
|
623 * be ORed together, e.g. op_wo | op_explicit */ |
|
624 enum x86_op_foreach_type { |
|
625 op_any = 0, /* ALL operands (explicit, implicit, rwx) */ |
|
626 op_dest = 1, /* operands with Write access */ |
|
627 op_src = 2, /* operands with Read access */ |
|
628 op_ro = 3, /* operands with Read but not Write access */ |
|
629 op_wo = 4, /* operands with Write but not Read access */ |
|
630 op_xo = 5, /* operands with Execute access */ |
|
631 op_rw = 6, /* operands with Read AND Write access */ |
|
632 op_implicit = 0x10, /* operands that are implied by the opcode */ |
|
633 op_explicit = 0x20 /* operands that are not side-effects */ |
|
634 }; |
|
635 |
|
636 |
|
637 /* free the operand list associated with an instruction -- useful for |
|
638 * preventing memory leaks when free()ing an x86_insn_t */ |
|
639 void x86_oplist_free( x86_insn_t *insn ); |
|
640 |
|
641 /* Operand foreach: invokes 'func' with 'insn' and 'arg' as arguments. The |
|
642 * 'type' parameter is used to select only operands matching specific |
|
643 * criteria. */ |
|
644 int x86_operand_foreach( x86_insn_t *insn, x86_operand_fn func, void *arg, |
|
645 enum x86_op_foreach_type type); |
|
646 |
|
647 /* convenience routine: returns count of operands matching 'type' */ |
|
648 size_t x86_operand_count( x86_insn_t *insn, enum x86_op_foreach_type type ); |
|
649 |
|
650 /* accessor functions for the operands */ |
|
651 x86_op_t * x86_operand_1st( x86_insn_t *insn ); |
|
652 x86_op_t * x86_operand_2nd( x86_insn_t *insn ); |
|
653 x86_op_t * x86_operand_3rd( x86_insn_t *insn ); |
|
654 |
|
655 /* these allow libdisasm 2.0 accessor functions to still be used */ |
|
656 #define x86_get_dest_operand( insn ) x86_operand_1st( insn ) |
|
657 #define x86_get_src_operand( insn ) x86_operand_2nd( insn ) |
|
658 #define x86_get_imm_operand( insn ) x86_operand_3rd( insn ) |
|
659 |
|
660 /* get size of operand data in bytes */ |
|
661 unsigned int x86_operand_size( x86_op_t *op ); |
|
662 |
|
663 /* Operand Convenience Routines: the following three routines are common |
|
664 * operations on operands, intended to ease the burden of the programmer. */ |
|
665 |
|
666 /* Get Address: return the value of an offset operand, or the offset of |
|
667 * a segment:offset absolute address */ |
|
668 uint32_t x86_get_address( x86_insn_t *insn ); |
|
669 |
|
670 /* Get Relative Offset: return as a sign-extended int32_t the near or far |
|
671 * relative offset operand, or 0 if there is none. There can be only one |
|
672 * relaive offset operand in an instruction. */ |
|
673 int32_t x86_get_rel_offset( x86_insn_t *insn ); |
|
674 |
|
675 /* Get Branch Target: return the x86_op_t containing the target of |
|
676 * a jump or call operand, or NULL if there is no branch target. |
|
677 * Internally, a 'branch target' is defined as any operand with |
|
678 * Execute Access set. There can be only one branch target per instruction. */ |
|
679 x86_op_t * x86_get_branch_target( x86_insn_t *insn ); |
|
680 |
|
681 /* Get Immediate: return the x86_op_t containing the immediate operand |
|
682 * for this instruction, or NULL if there is no immediate operand. There |
|
683 * can be only one immediate operand per instruction */ |
|
684 x86_op_t * x86_get_imm( x86_insn_t *insn ); |
|
685 |
|
686 /* Get Raw Immediate Data: returns a pointer to the immediate data encoded |
|
687 * in the instruction. This is useful for large data types [>32 bits] currently |
|
688 * not supported by libdisasm, or for determining if the disassembler |
|
689 * screwed up the conversion of the immediate data. Note that 'imm' in this |
|
690 * context refers to immediate data encoded at the end of an instruction as |
|
691 * detailed in the Intel Manual Vol II Chapter 2; it does not refer to the |
|
692 * 'op_imm' operand (the third operand in instructions like 'mul' */ |
|
693 unsigned char * x86_get_raw_imm( x86_insn_t *insn ); |
|
694 |
|
695 |
|
696 /* More accessor fuctions, this time for user-defined info... */ |
|
697 /* set the address (usually RVA) of the insn */ |
|
698 void x86_set_insn_addr( x86_insn_t *insn, uint32_t addr ); |
|
699 |
|
700 /* set the offset (usually offset into file) of the insn */ |
|
701 void x86_set_insn_offset( x86_insn_t *insn, unsigned int offset ); |
|
702 |
|
703 /* set a pointer to the function owning the instruction. The |
|
704 * type of 'func' is user-defined; libdisasm does not use the func field. */ |
|
705 void x86_set_insn_function( x86_insn_t *insn, void * func ); |
|
706 |
|
707 /* set a pointer to the block of code owning the instruction. The |
|
708 * type of 'block' is user-defined; libdisasm does not use the block field. */ |
|
709 void x86_set_insn_block( x86_insn_t *insn, void * block ); |
|
710 |
|
711 /* instruction tagging: these routines allow the programmer to mark |
|
712 * instructions as "seen" in a DFS, for example. libdisasm does not use |
|
713 * the tag field.*/ |
|
714 /* set insn->tag to 1 */ |
|
715 void x86_tag_insn( x86_insn_t *insn ); |
|
716 /* set insn->tag to 0 */ |
|
717 void x86_untag_insn( x86_insn_t *insn ); |
|
718 /* return insn->tag */ |
|
719 int x86_insn_is_tagged( x86_insn_t *insn ); |
|
720 |
|
721 |
|
722 /* Disassembly formats: |
|
723 * AT&T is standard AS/GAS-style: "mnemonic\tsrc, dest, imm" |
|
724 * Intel is standard MASM/NASM/TASM: "mnemonic\tdest,src, imm" |
|
725 * Native is tab-delimited: "RVA\tbytes\tmnemonic\tdest\tsrc\timm" |
|
726 * XML is your typical <insn> ... </insn> |
|
727 * Raw is addr|offset|size|bytes|prefix... see libdisasm_formats.7 |
|
728 */ |
|
729 enum x86_asm_format { |
|
730 unknown_syntax = 0, /* never use! */ |
|
731 native_syntax, /* header: 35 bytes */ |
|
732 intel_syntax, /* header: 23 bytes */ |
|
733 att_syntax, /* header: 23 bytes */ |
|
734 xml_syntax, /* header: 679 bytes */ |
|
735 raw_syntax /* header: 172 bytes */ |
|
736 }; |
|
737 |
|
738 /* format (sprintf) an operand into 'buf' using specified syntax */ |
|
739 int x86_format_operand(x86_op_t *op, char *buf, int len, |
|
740 enum x86_asm_format format); |
|
741 |
|
742 /* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */ |
|
743 int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len, |
|
744 enum x86_asm_format format); |
|
745 |
|
746 /* format (sprintf) an instruction into 'buf' using specified syntax; |
|
747 * this includes formatting all operands */ |
|
748 int x86_format_insn(x86_insn_t *insn, char *buf, int len, enum x86_asm_format); |
|
749 |
|
750 /* fill 'buf' with a description of the format's syntax */ |
|
751 int x86_format_header( char *buf, int len, enum x86_asm_format format); |
|
752 |
|
753 /* Endianness of an x86 CPU : 0 is big, 1 is little; always returns 1 */ |
|
754 unsigned int x86_endian(void); |
|
755 |
|
756 /* Default address and operand size in bytes */ |
|
757 unsigned int x86_addr_size(void); |
|
758 unsigned int x86_op_size(void); |
|
759 |
|
760 /* Size of a machine word in bytes */ |
|
761 unsigned int x86_word_size(void); |
|
762 |
|
763 /* maximum size of a code instruction */ |
|
764 #define x86_max_inst_size(x) x86_max_insn_size(x) |
|
765 unsigned int x86_max_insn_size(void); |
|
766 |
|
767 /* register IDs of Stack, Frame, Instruction pointer and Flags register */ |
|
768 unsigned int x86_sp_reg(void); |
|
769 unsigned int x86_fp_reg(void); |
|
770 unsigned int x86_ip_reg(void); |
|
771 unsigned int x86_flag_reg(void); |
|
772 |
|
773 /* fill 'reg' struct with details of register 'id' */ |
|
774 void x86_reg_from_id( unsigned int id, x86_reg_t * reg ); |
|
775 |
|
776 /* convenience macro demonstrating how to get an aliased register; proto is |
|
777 * void x86_get_aliased_reg( x86_reg_t *alias_reg, x86_reg_t *output_reg ) |
|
778 * where 'alias_reg' is a reg operand and 'output_reg' is filled with the |
|
779 * register that the operand is an alias for */ |
|
780 #define x86_get_aliased_reg( alias_reg, output_reg ) \ |
|
781 x86_reg_from_id( alias_reg->alias, output_reg ) |
|
782 |
|
783 |
|
784 /* ================================== Invariant Instruction Representation */ |
|
785 /* Invariant instructions are used for generating binary signatures; |
|
786 * the instruction is modified so that all variant bytes in an instruction |
|
787 * are replaced with a wildcard byte. |
|
788 * |
|
789 * A 'variant byte' is one that is expected to be modified by either the |
|
790 * static or the dynamic linker: for example, an address encoded in an |
|
791 * instruction. |
|
792 * |
|
793 * By comparing the invariant representation of one instruction [or of a |
|
794 * sequence of instructions] with the invariant representation of another, |
|
795 * one determine whether the two invariant representations are from the same |
|
796 * relocatable object [.o] file. Thus one can use binary signatures [which |
|
797 * are just sequences of invariant instruction representations] to look for |
|
798 * library routines which have been statically-linked into a binary. |
|
799 * |
|
800 * The invariant routines are faster and smaller than the disassembly |
|
801 * routines; they can be used to determine the size of an instruction |
|
802 * without all of the overhead of a full instruction disassembly. |
|
803 */ |
|
804 |
|
805 /* This byte is used to replace variant bytes */ |
|
806 #define X86_WILDCARD_BYTE 0xF4 |
|
807 |
|
808 typedef struct { |
|
809 enum x86_op_type type; /* operand type */ |
|
810 enum x86_op_datatype datatype; /* operand size */ |
|
811 enum x86_op_access access; /* operand access [RWX] */ |
|
812 enum x86_op_flags flags; /* misc flags */ |
|
813 } x86_invariant_op_t; |
|
814 |
|
815 typedef struct { |
|
816 unsigned char bytes[64]; /* invariant representation */ |
|
817 unsigned int size; /* number of bytes in insn */ |
|
818 enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */ |
|
819 enum x86_insn_type type; /* type, e.g. INS_BRANCH */ |
|
820 x86_invariant_op_t operands[3]; /* operands: dest, src, imm */ |
|
821 } x86_invariant_t; |
|
822 |
|
823 |
|
824 /* return a version of the instruction with the variant bytes masked out */ |
|
825 size_t x86_invariant_disasm( unsigned char *buf, int buf_len, |
|
826 x86_invariant_t *inv ); |
|
827 /* return the size in bytes of the intruction pointed to by 'buf'; |
|
828 * this used x86_invariant_disasm since it faster than x86_disasm */ |
|
829 size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len ); |
|
830 |
|
831 #ifdef __cplusplus |
|
832 } |
|
833 #endif |
|
834 |
|
835 |
|
836 #endif |