1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/toolkit/crashreporter/google-breakpad/src/third_party/libdisasm/libdis.h Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,836 @@ 1.4 +#ifndef LIBDISASM_H 1.5 +#define LIBDISASM_H 1.6 + 1.7 +#ifdef WIN32 1.8 +#include <windows.h> 1.9 +#endif 1.10 + 1.11 +#include <stdint.h> 1.12 + 1.13 +/* 'NEW" types 1.14 + * __________________________________________________________________________*/ 1.15 +#ifndef LIBDISASM_QWORD_H /* do not interfere with qword.h */ 1.16 + #define LIBDISASM_QWORD_H 1.17 + #ifdef _MSC_VER 1.18 + typedef __int64 qword_t; 1.19 + #else 1.20 + typedef int64_t qword_t; 1.21 + #endif 1.22 +#endif 1.23 + 1.24 +#include <sys/types.h> 1.25 + 1.26 +#ifdef __cplusplus 1.27 +extern "C" { 1.28 +#endif 1.29 + 1.30 +/* 'NEW" x86 API 1.31 + * __________________________________________________________________________*/ 1.32 + 1.33 + 1.34 +/* ========================================= Error Reporting */ 1.35 +/* REPORT CODES 1.36 + * These are passed to a reporter function passed at initialization. 1.37 + * Each code determines the type of the argument passed to the reporter; 1.38 + * this allows the report to recover from errors, or just log them. 1.39 + */ 1.40 +enum x86_report_codes { 1.41 + report_disasm_bounds, /* RVA OUT OF BOUNDS : The disassembler could 1.42 + not disassemble the supplied RVA as it is 1.43 + out of the range of the buffer. The 1.44 + application should store the address and 1.45 + attempt to determine what section of the 1.46 + binary it is in, then disassemble the 1.47 + address from the bytes in that section. 1.48 + data: uint32_t rva */ 1.49 + report_insn_bounds, /* INSTRUCTION OUT OF BOUNDS: The disassembler 1.50 + could not disassemble the instruction as 1.51 + the instruction would require bytes beyond 1.52 + the end of the current buffer. This usually 1.53 + indicated garbage bytes at the end of a 1.54 + buffer, or an incorrectly-sized buffer. 1.55 + data: uint32_t rva */ 1.56 + report_invalid_insn, /* INVALID INSTRUCTION: The disassembler could 1.57 + not disassemble the instruction as it has an 1.58 + invalid combination of opcodes and operands. 1.59 + This will stop automated disassembly; the 1.60 + application can restart the disassembly 1.61 + after the invalid instruction. 1.62 + data: uint32_t rva */ 1.63 + report_unknown 1.64 +}; 1.65 + 1.66 +/* 'arg' is optional arbitrary data provided by the code passing the 1.67 + * callback -- for example, it could be 'this' or 'self' in OOP code. 1.68 + * 'code' is provided by libdisasm, it is one of the above 1.69 + * 'data' is provided by libdisasm and is context-specific, per the enums */ 1.70 +typedef void (*DISASM_REPORTER)( enum x86_report_codes code, 1.71 + void *data, void *arg ); 1.72 + 1.73 + 1.74 +/* x86_report_error : Call the register reporter to report an error */ 1.75 +void x86_report_error( enum x86_report_codes code, void *data ); 1.76 + 1.77 +/* ========================================= Libdisasm Management Routines */ 1.78 +enum x86_options { /* these can be ORed together */ 1.79 + opt_none= 0, 1.80 + opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */ 1.81 + opt_16_bit=2, /* 16-bit/DOS disassembly */ 1.82 + opt_att_mnemonics=4, /* use AT&T syntax names for alternate opcode mnemonics */ 1.83 +}; 1.84 + 1.85 +/* management routines */ 1.86 +/* 'arg' is caller-specific data which is passed as the first argument 1.87 + * to the reporter callback routine */ 1.88 +int x86_init( enum x86_options options, DISASM_REPORTER reporter, void *arg); 1.89 +void x86_set_reporter( DISASM_REPORTER reporter, void *arg); 1.90 +void x86_set_options( enum x86_options options ); 1.91 +enum x86_options x86_get_options( void ); 1.92 +int x86_cleanup(void); 1.93 + 1.94 + 1.95 +/* ========================================= Instruction Representation */ 1.96 +/* these defines are only intended for use in the array decl's */ 1.97 +#define MAX_REGNAME 8 1.98 + 1.99 +#define MAX_PREFIX_STR 32 1.100 +#define MAX_MNEM_STR 16 1.101 +#define MAX_INSN_SIZE 20 /* same as in i386.h */ 1.102 +#define MAX_OP_STRING 32 /* max possible operand size in string form */ 1.103 +#define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */ 1.104 +#define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */ 1.105 +#define MAX_NUM_OPERANDS 8 /* max # implicit and explicit operands */ 1.106 +/* in these, the '2 *' is arbitrary: the max # of operands should require 1.107 + * more space than the rest of the insn */ 1.108 +#define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */ 1.109 +#define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */ 1.110 +#define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */ 1.111 + 1.112 +enum x86_reg_type { /* NOTE: these may be ORed together */ 1.113 + reg_gen = 0x00001, /* general purpose */ 1.114 + reg_in = 0x00002, /* incoming args, ala RISC */ 1.115 + reg_out = 0x00004, /* args to calls, ala RISC */ 1.116 + reg_local = 0x00008, /* local vars, ala RISC */ 1.117 + reg_fpu = 0x00010, /* FPU data register */ 1.118 + reg_seg = 0x00020, /* segment register */ 1.119 + reg_simd = 0x00040, /* SIMD/MMX reg */ 1.120 + reg_sys = 0x00080, /* restricted/system register */ 1.121 + reg_sp = 0x00100, /* stack pointer */ 1.122 + reg_fp = 0x00200, /* frame pointer */ 1.123 + reg_pc = 0x00400, /* program counter */ 1.124 + reg_retaddr = 0x00800, /* return addr for func */ 1.125 + reg_cond = 0x01000, /* condition code / flags */ 1.126 + reg_zero = 0x02000, /* zero register, ala RISC */ 1.127 + reg_ret = 0x04000, /* return value */ 1.128 + reg_src = 0x10000, /* array/rep source */ 1.129 + reg_dest = 0x20000, /* array/rep destination */ 1.130 + reg_count = 0x40000 /* array/rep/loop counter */ 1.131 +}; 1.132 + 1.133 +/* x86_reg_t : an X86 CPU register */ 1.134 +typedef struct { 1.135 + char name[MAX_REGNAME]; 1.136 + enum x86_reg_type type; /* what register is used for */ 1.137 + unsigned int size; /* size of register in bytes */ 1.138 + unsigned int id; /* register ID #, for quick compares */ 1.139 + unsigned int alias; /* ID of reg this is an alias for */ 1.140 + unsigned int shift; /* amount to shift aliased reg by */ 1.141 +} x86_reg_t; 1.142 + 1.143 +/* x86_ea_t : an X86 effective address (address expression) */ 1.144 +typedef struct { 1.145 + unsigned int scale; /* scale factor */ 1.146 + x86_reg_t index, base; /* index, base registers */ 1.147 + int32_t disp; /* displacement */ 1.148 + char disp_sign; /* is negative? 1/0 */ 1.149 + char disp_size; /* 0, 1, 2, 4 */ 1.150 +} x86_ea_t; 1.151 + 1.152 +/* x86_absolute_t : an X86 segment:offset address (descriptor) */ 1.153 +typedef struct { 1.154 + unsigned short segment; /* loaded directly into CS */ 1.155 + union { 1.156 + unsigned short off16; /* loaded directly into IP */ 1.157 + uint32_t off32; /* loaded directly into EIP */ 1.158 + } offset; 1.159 +} x86_absolute_t; 1.160 + 1.161 +enum x86_op_type { /* mutually exclusive */ 1.162 + op_unused = 0, /* empty/unused operand: should never occur */ 1.163 + op_register = 1, /* CPU register */ 1.164 + op_immediate = 2, /* Immediate Value */ 1.165 + op_relative_near = 3, /* Relative offset from IP */ 1.166 + op_relative_far = 4, /* Relative offset from IP */ 1.167 + op_absolute = 5, /* Absolute address (ptr16:32) */ 1.168 + op_expression = 6, /* Address expression (scale/index/base/disp) */ 1.169 + op_offset = 7, /* Offset from start of segment (m32) */ 1.170 + op_unknown 1.171 +}; 1.172 + 1.173 +#define x86_optype_is_address( optype ) \ 1.174 + ( optype == op_absolute || optype == op_offset ) 1.175 +#define x86_optype_is_relative( optype ) \ 1.176 + ( optype == op_relative_near || optype == op_relative_far ) 1.177 +#define x86_optype_is_memory( optype ) \ 1.178 + ( optype > op_immediate && optype < op_unknown ) 1.179 + 1.180 +enum x86_op_datatype { /* these use Intel's lame terminology */ 1.181 + op_byte = 1, /* 1 byte integer */ 1.182 + op_word = 2, /* 2 byte integer */ 1.183 + op_dword = 3, /* 4 byte integer */ 1.184 + op_qword = 4, /* 8 byte integer */ 1.185 + op_dqword = 5, /* 16 byte integer */ 1.186 + op_sreal = 6, /* 4 byte real (single real) */ 1.187 + op_dreal = 7, /* 8 byte real (double real) */ 1.188 + op_extreal = 8, /* 10 byte real (extended real) */ 1.189 + op_bcd = 9, /* 10 byte binary-coded decimal */ 1.190 + op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */ 1.191 + op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */ 1.192 + op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */ 1.193 + op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */ 1.194 + op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */ 1.195 + op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */ 1.196 + op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */ 1.197 + op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */ 1.198 + op_bounds16 = 18, /* signed 16:16 lower:upper bounds */ 1.199 + op_bounds32 = 19, /* signed 32:32 lower:upper bounds */ 1.200 + op_fpuenv16 = 20, /* 14 byte FPU control/environment data */ 1.201 + op_fpuenv32 = 21, /* 28 byte FPU control/environment data */ 1.202 + op_fpustate16 = 22, /* 94 byte FPU state (env & reg stack) */ 1.203 + op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */ 1.204 + op_fpregset = 24, /* 512 bytes: register set */ 1.205 + op_fpreg = 25, /* FPU register */ 1.206 + op_none = 0xFF, /* operand without a datatype (INVLPG) */ 1.207 +}; 1.208 + 1.209 +enum x86_op_access { /* ORed together */ 1.210 + op_read = 1, 1.211 + op_write = 2, 1.212 + op_execute = 4 1.213 +}; 1.214 + 1.215 +enum x86_op_flags { /* ORed together, but segs are mutually exclusive */ 1.216 + op_signed = 1, /* signed integer */ 1.217 + op_string = 2, /* possible string or array */ 1.218 + op_constant = 4, /* symbolic constant */ 1.219 + op_pointer = 8, /* operand points to a memory address */ 1.220 + op_sysref = 0x010, /* operand is a syscall number */ 1.221 + op_implied = 0x020, /* operand is implicit in the insn */ 1.222 + op_hardcode = 0x40, /* operand is hardcoded in insn definition */ 1.223 + /* NOTE: an 'implied' operand is one which can be considered a side 1.224 + * effect of the insn, e.g. %esp being modified by PUSH or POP. A 1.225 + * 'hard-coded' operand is one which is specified in the instruction 1.226 + * definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference 1.227 + * is that hard-coded operands are printed by disassemblers and are 1.228 + * required to re-assemble, while implicit operands are invisible. */ 1.229 + op_es_seg = 0x100, /* ES segment override */ 1.230 + op_cs_seg = 0x200, /* CS segment override */ 1.231 + op_ss_seg = 0x300, /* SS segment override */ 1.232 + op_ds_seg = 0x400, /* DS segment override */ 1.233 + op_fs_seg = 0x500, /* FS segment override */ 1.234 + op_gs_seg = 0x600 /* GS segment override */ 1.235 +}; 1.236 + 1.237 +/* x86_op_t : an X86 instruction operand */ 1.238 +typedef struct { 1.239 + enum x86_op_type type; /* operand type */ 1.240 + enum x86_op_datatype datatype; /* operand size */ 1.241 + enum x86_op_access access; /* operand access [RWX] */ 1.242 + enum x86_op_flags flags; /* misc flags */ 1.243 + union { 1.244 + /* sizeof will have to work on these union members! */ 1.245 + /* immediate values */ 1.246 + char sbyte; 1.247 + short sword; 1.248 + int32_t sdword; 1.249 + qword_t sqword; 1.250 + unsigned char byte; 1.251 + unsigned short word; 1.252 + uint32_t dword; 1.253 + qword_t qword; 1.254 + float sreal; 1.255 + double dreal; 1.256 + /* misc large/non-native types */ 1.257 + unsigned char extreal[10]; 1.258 + unsigned char bcd[10]; 1.259 + qword_t dqword[2]; 1.260 + unsigned char simd[16]; 1.261 + unsigned char fpuenv[28]; 1.262 + /* offset from segment */ 1.263 + uint32_t offset; 1.264 + /* ID of CPU register */ 1.265 + x86_reg_t reg; 1.266 + /* offsets from current insn */ 1.267 + char relative_near; 1.268 + int32_t relative_far; 1.269 + /* segment:offset */ 1.270 + x86_absolute_t absolute; 1.271 + /* effective address [expression] */ 1.272 + x86_ea_t expression; 1.273 + } data; 1.274 + /* this is needed to make formatting operands more sane */ 1.275 + void * insn; /* pointer to x86_insn_t owning operand */ 1.276 +} x86_op_t; 1.277 + 1.278 +/* Linked list of x86_op_t; provided for manual traversal of the operand 1.279 + * list in an insn. Users wishing to add operands to this list, e.g. to add 1.280 + * implicit operands, should use x86_operand_new in x86_operand_list.h */ 1.281 +typedef struct x86_operand_list { 1.282 + x86_op_t op; 1.283 + struct x86_operand_list *next; 1.284 +} x86_oplist_t; 1.285 + 1.286 +enum x86_insn_group { 1.287 + insn_none = 0, /* invalid instruction */ 1.288 + insn_controlflow = 1, 1.289 + insn_arithmetic = 2, 1.290 + insn_logic = 3, 1.291 + insn_stack = 4, 1.292 + insn_comparison = 5, 1.293 + insn_move = 6, 1.294 + insn_string = 7, 1.295 + insn_bit_manip = 8, 1.296 + insn_flag_manip = 9, 1.297 + insn_fpu = 10, 1.298 + insn_interrupt = 13, 1.299 + insn_system = 14, 1.300 + insn_other = 15 1.301 +}; 1.302 + 1.303 +enum x86_insn_type { 1.304 + insn_invalid = 0, /* invalid instruction */ 1.305 + /* insn_controlflow */ 1.306 + insn_jmp = 0x1001, 1.307 + insn_jcc = 0x1002, 1.308 + insn_call = 0x1003, 1.309 + insn_callcc = 0x1004, 1.310 + insn_return = 0x1005, 1.311 + /* insn_arithmetic */ 1.312 + insn_add = 0x2001, 1.313 + insn_sub = 0x2002, 1.314 + insn_mul = 0x2003, 1.315 + insn_div = 0x2004, 1.316 + insn_inc = 0x2005, 1.317 + insn_dec = 0x2006, 1.318 + insn_shl = 0x2007, 1.319 + insn_shr = 0x2008, 1.320 + insn_rol = 0x2009, 1.321 + insn_ror = 0x200A, 1.322 + /* insn_logic */ 1.323 + insn_and = 0x3001, 1.324 + insn_or = 0x3002, 1.325 + insn_xor = 0x3003, 1.326 + insn_not = 0x3004, 1.327 + insn_neg = 0x3005, 1.328 + /* insn_stack */ 1.329 + insn_push = 0x4001, 1.330 + insn_pop = 0x4002, 1.331 + insn_pushregs = 0x4003, 1.332 + insn_popregs = 0x4004, 1.333 + insn_pushflags = 0x4005, 1.334 + insn_popflags = 0x4006, 1.335 + insn_enter = 0x4007, 1.336 + insn_leave = 0x4008, 1.337 + /* insn_comparison */ 1.338 + insn_test = 0x5001, 1.339 + insn_cmp = 0x5002, 1.340 + /* insn_move */ 1.341 + insn_mov = 0x6001, /* move */ 1.342 + insn_movcc = 0x6002, /* conditional move */ 1.343 + insn_xchg = 0x6003, /* exchange */ 1.344 + insn_xchgcc = 0x6004, /* conditional exchange */ 1.345 + /* insn_string */ 1.346 + insn_strcmp = 0x7001, 1.347 + insn_strload = 0x7002, 1.348 + insn_strmov = 0x7003, 1.349 + insn_strstore = 0x7004, 1.350 + insn_translate = 0x7005, /* xlat */ 1.351 + /* insn_bit_manip */ 1.352 + insn_bittest = 0x8001, 1.353 + insn_bitset = 0x8002, 1.354 + insn_bitclear = 0x8003, 1.355 + /* insn_flag_manip */ 1.356 + insn_clear_carry = 0x9001, 1.357 + insn_clear_zero = 0x9002, 1.358 + insn_clear_oflow = 0x9003, 1.359 + insn_clear_dir = 0x9004, 1.360 + insn_clear_sign = 0x9005, 1.361 + insn_clear_parity = 0x9006, 1.362 + insn_set_carry = 0x9007, 1.363 + insn_set_zero = 0x9008, 1.364 + insn_set_oflow = 0x9009, 1.365 + insn_set_dir = 0x900A, 1.366 + insn_set_sign = 0x900B, 1.367 + insn_set_parity = 0x900C, 1.368 + insn_tog_carry = 0x9010, 1.369 + insn_tog_zero = 0x9020, 1.370 + insn_tog_oflow = 0x9030, 1.371 + insn_tog_dir = 0x9040, 1.372 + insn_tog_sign = 0x9050, 1.373 + insn_tog_parity = 0x9060, 1.374 + /* insn_fpu */ 1.375 + insn_fmov = 0xA001, 1.376 + insn_fmovcc = 0xA002, 1.377 + insn_fneg = 0xA003, 1.378 + insn_fabs = 0xA004, 1.379 + insn_fadd = 0xA005, 1.380 + insn_fsub = 0xA006, 1.381 + insn_fmul = 0xA007, 1.382 + insn_fdiv = 0xA008, 1.383 + insn_fsqrt = 0xA009, 1.384 + insn_fcmp = 0xA00A, 1.385 + insn_fcos = 0xA00C, 1.386 + insn_fldpi = 0xA00D, 1.387 + insn_fldz = 0xA00E, 1.388 + insn_ftan = 0xA00F, 1.389 + insn_fsine = 0xA010, 1.390 + insn_fsys = 0xA020, 1.391 + /* insn_interrupt */ 1.392 + insn_int = 0xD001, 1.393 + insn_intcc = 0xD002, /* not present in x86 ISA */ 1.394 + insn_iret = 0xD003, 1.395 + insn_bound = 0xD004, 1.396 + insn_debug = 0xD005, 1.397 + insn_trace = 0xD006, 1.398 + insn_invalid_op = 0xD007, 1.399 + insn_oflow = 0xD008, 1.400 + /* insn_system */ 1.401 + insn_halt = 0xE001, 1.402 + insn_in = 0xE002, /* input from port/bus */ 1.403 + insn_out = 0xE003, /* output to port/bus */ 1.404 + insn_cpuid = 0xE004, 1.405 + /* insn_other */ 1.406 + insn_nop = 0xF001, 1.407 + insn_bcdconv = 0xF002, /* convert to or from BCD */ 1.408 + insn_szconv = 0xF003 /* change size of operand */ 1.409 +}; 1.410 + 1.411 +/* These flags specify special characteristics of the instruction, such as 1.412 + * whether the inatruction is privileged or whether it serializes the 1.413 + * pipeline. 1.414 + * NOTE : These may not be accurate for all instructions; updates to the 1.415 + * opcode tables have not been completed. */ 1.416 +enum x86_insn_note { 1.417 + insn_note_ring0 = 1, /* Only available in ring 0 */ 1.418 + insn_note_smm = 2, /* "" in System Management Mode */ 1.419 + insn_note_serial = 4, /* Serializing instruction */ 1.420 + insn_note_nonswap = 8, /* Does not swap arguments in att-style formatting */ 1.421 + insn_note_nosuffix = 16, /* Does not have size suffix in att-style formatting */ 1.422 +}; 1.423 + 1.424 +/* This specifies what effects the instruction has on the %eflags register */ 1.425 +enum x86_flag_status { 1.426 + insn_carry_set = 0x1, /* CF */ 1.427 + insn_zero_set = 0x2, /* ZF */ 1.428 + insn_oflow_set = 0x4, /* OF */ 1.429 + insn_dir_set = 0x8, /* DF */ 1.430 + insn_sign_set = 0x10, /* SF */ 1.431 + insn_parity_set = 0x20, /* PF */ 1.432 + insn_carry_or_zero_set = 0x40, 1.433 + insn_zero_set_or_sign_ne_oflow = 0x80, 1.434 + insn_carry_clear = 0x100, 1.435 + insn_zero_clear = 0x200, 1.436 + insn_oflow_clear = 0x400, 1.437 + insn_dir_clear = 0x800, 1.438 + insn_sign_clear = 0x1000, 1.439 + insn_parity_clear = 0x2000, 1.440 + insn_sign_eq_oflow = 0x4000, 1.441 + insn_sign_ne_oflow = 0x8000 1.442 +}; 1.443 + 1.444 +/* The CPU model in which the insturction first appeared; this can be used 1.445 + * to mask out instructions appearing in earlier or later models or to 1.446 + * check the portability of a binary. 1.447 + * NOTE : These may not be accurate for all instructions; updates to the 1.448 + * opcode tables have not been completed. */ 1.449 +enum x86_insn_cpu { 1.450 + cpu_8086 = 1, /* Intel */ 1.451 + cpu_80286 = 2, 1.452 + cpu_80386 = 3, 1.453 + cpu_80387 = 4, 1.454 + cpu_80486 = 5, 1.455 + cpu_pentium = 6, 1.456 + cpu_pentiumpro = 7, 1.457 + cpu_pentium2 = 8, 1.458 + cpu_pentium3 = 9, 1.459 + cpu_pentium4 = 10, 1.460 + cpu_k6 = 16, /* AMD */ 1.461 + cpu_k7 = 32, 1.462 + cpu_athlon = 48 1.463 +}; 1.464 + 1.465 +/* CPU ISA subsets: These are derived from the Instruction Groups in 1.466 + * Intel Vol 1 Chapter 5; they represent subsets of the IA32 ISA but 1.467 + * do not reflect the 'type' of the instruction in the same way that 1.468 + * x86_insn_group does. In short, these are AMD/Intel's somewhat useless 1.469 + * designations. 1.470 + * NOTE : These may not be accurate for all instructions; updates to the 1.471 + * opcode tables have not been completed. */ 1.472 +enum x86_insn_isa { 1.473 + isa_gp = 1, /* general purpose */ 1.474 + isa_fp = 2, /* floating point */ 1.475 + isa_fpumgt = 3, /* FPU/SIMD management */ 1.476 + isa_mmx = 4, /* Intel MMX */ 1.477 + isa_sse1 = 5, /* Intel SSE SIMD */ 1.478 + isa_sse2 = 6, /* Intel SSE2 SIMD */ 1.479 + isa_sse3 = 7, /* Intel SSE3 SIMD */ 1.480 + isa_3dnow = 8, /* AMD 3DNow! SIMD */ 1.481 + isa_sys = 9 /* system instructions */ 1.482 +}; 1.483 + 1.484 +enum x86_insn_prefix { 1.485 + insn_no_prefix = 0, 1.486 + insn_rep_zero = 1, /* REPZ and REPE */ 1.487 + insn_rep_notzero = 2, /* REPNZ and REPNZ */ 1.488 + insn_lock = 4 /* LOCK: */ 1.489 +}; 1.490 + 1.491 +/* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */ 1.492 +/* x86_insn_t : an X86 instruction */ 1.493 +typedef struct { 1.494 + /* information about the instruction */ 1.495 + uint32_t addr; /* load address */ 1.496 + uint32_t offset; /* offset into file/buffer */ 1.497 + enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */ 1.498 + enum x86_insn_type type; /* type, e.g. INS_BRANCH */ 1.499 + enum x86_insn_note note; /* note, e.g. RING0 */ 1.500 + unsigned char bytes[MAX_INSN_SIZE]; 1.501 + unsigned char size; /* size of insn in bytes */ 1.502 + /* 16/32-bit mode settings */ 1.503 + unsigned char addr_size; /* default address size : 2 or 4 */ 1.504 + unsigned char op_size; /* default operand size : 2 or 4 */ 1.505 + /* CPU/instruction set */ 1.506 + enum x86_insn_cpu cpu; 1.507 + enum x86_insn_isa isa; 1.508 + /* flags */ 1.509 + enum x86_flag_status flags_set; /* flags set or tested by insn */ 1.510 + enum x86_flag_status flags_tested; 1.511 + /* stack */ 1.512 + unsigned char stack_mod; /* 0 or 1 : is the stack modified? */ 1.513 + int32_t stack_mod_val; /* val stack is modified by if known */ 1.514 + 1.515 + /* the instruction proper */ 1.516 + enum x86_insn_prefix prefix; /* prefixes ORed together */ 1.517 + char prefix_string[MAX_PREFIX_STR]; /* prefixes [might be truncated] */ 1.518 + char mnemonic[MAX_MNEM_STR]; 1.519 + x86_oplist_t *operands; /* list of explicit/implicit operands */ 1.520 + size_t operand_count; /* total number of operands */ 1.521 + size_t explicit_count; /* number of explicit operands */ 1.522 + /* convenience fields for user */ 1.523 + void *block; /* code block containing this insn */ 1.524 + void *function; /* function containing this insn */ 1.525 + int tag; /* tag the insn as seen/processed */ 1.526 +} x86_insn_t; 1.527 + 1.528 + 1.529 +/* returns 0 if an instruction is invalid, 1 if valid */ 1.530 +int x86_insn_is_valid( x86_insn_t *insn ); 1.531 + 1.532 +/* DISASSEMBLY ROUTINES 1.533 + * Canonical order of arguments is 1.534 + * (buf, buf_len, buf_rva, offset, len, insn, func, arg, resolve_func) 1.535 + * ...but of course all of these are not used at the same time. 1.536 + */ 1.537 + 1.538 + 1.539 +/* Function prototype for caller-supplied callback routine 1.540 + * These callbacks are intended to process 'insn' further, e.g. by 1.541 + * adding it to a linked list, database, etc */ 1.542 +typedef void (*DISASM_CALLBACK)( x86_insn_t *insn, void * arg ); 1.543 + 1.544 +/* Function prototype for caller-supplied address resolver. 1.545 + * This routine is used to determine the rva to disassemble next, given 1.546 + * the 'dest' operand of a jump/call. This allows the caller to resolve 1.547 + * jump/call targets stored in a register or on the stack, and also allows 1.548 + * the caller to prevent endless loops by checking if an address has 1.549 + * already been disassembled. If an address cannot be resolved from the 1.550 + * operand, or if the address has already been disassembled, this routine 1.551 + * should return -1; in all other cases the RVA to be disassembled next 1.552 + * should be returned. */ 1.553 +typedef int32_t (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t * current_insn, 1.554 + void *arg ); 1.555 + 1.556 + 1.557 +/* x86_disasm: Disassemble a single instruction from a buffer of bytes. 1.558 + * Returns size of instruction in bytes. 1.559 + * Caller is responsible for calling x86_oplist_free() on 1.560 + * a reused "insn" to avoid leaking memory when calling this 1.561 + * function repeatedly. 1.562 + * buf : Buffer of bytes to disassemble 1.563 + * buf_len : Length of the buffer 1.564 + * buf_rva : Load address of the start of the buffer 1.565 + * offset : Offset in buffer to disassemble 1.566 + * insn : Structure to fill with disassembled instruction 1.567 + */ 1.568 +unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len, 1.569 + uint32_t buf_rva, unsigned int offset, 1.570 + x86_insn_t * insn ); 1.571 + 1.572 +/* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer, 1.573 + * invoking a callback function each time an instruction 1.574 + * is successfully disassembled. The 'range' refers to the 1.575 + * bytes between 'offset' and 'offset + len' in the buffer; 1.576 + * 'len' is assumed to be less than the length of the buffer. 1.577 + * Returns number of instructions processed. 1.578 + * buf : Buffer of bytes to disassemble (e.g. .text section) 1.579 + * buf_rva : Load address of buffer (e.g. ELF Virtual Address) 1.580 + * offset : Offset in buffer to start disassembly at 1.581 + * len : Number of bytes to disassemble 1.582 + * func : Callback function to invoke (may be NULL) 1.583 + * arg : Arbitrary data to pass to callback (may be NULL) 1.584 + */ 1.585 +unsigned int x86_disasm_range( unsigned char *buf, uint32_t buf_rva, 1.586 + unsigned int offset, unsigned int len, 1.587 + DISASM_CALLBACK func, void *arg ); 1.588 + 1.589 +/* x86_disasm_forward: Flow-of-execution disassembly of the bytes in a buffer, 1.590 + * invoking a callback function each time an instruction 1.591 + * is successfully disassembled. 1.592 + * buf : Buffer to disassemble (e.g. .text section) 1.593 + * buf_len : Number of bytes in buffer 1.594 + * buf_rva : Load address of buffer (e.g. ELF Virtual Address) 1.595 + * offset : Offset in buffer to start disassembly at (e.g. entry point) 1.596 + * func : Callback function to invoke (may be NULL) 1.597 + * arg : Arbitrary data to pass to callback (may be NULL) 1.598 + * resolver: Caller-supplied address resolver. If no resolver is 1.599 + * supplied, a default internal one is used -- however the 1.600 + * internal resolver does NOT catch loops and could end up 1.601 + * disassembling forever.. 1.602 + * r_arg : Arbitrary data to pass to resolver (may be NULL) 1.603 + */ 1.604 +unsigned int x86_disasm_forward( unsigned char *buf, unsigned int buf_len, 1.605 + uint32_t buf_rva, unsigned int offset, 1.606 + DISASM_CALLBACK func, void *arg, 1.607 + DISASM_RESOLVER resolver, void *r_arg ); 1.608 + 1.609 +/* Instruction operands: these are stored as a list of explicit and 1.610 + * implicit operands. It is recommended that the 'foreach' routines 1.611 + * be used to when examining operands for purposes of data flow analysis */ 1.612 + 1.613 +/* Operand FOREACH callback: 'arg' is an abritrary parameter passed to the 1.614 + * foreach routine, 'insn' is the x86_insn_t whose operands are being 1.615 + * iterated over, and 'op' is the current x86_op_t */ 1.616 +typedef void (*x86_operand_fn)(x86_op_t *op, x86_insn_t *insn, void *arg); 1.617 + 1.618 +/* FOREACH types: these are used to limit the foreach results to 1.619 + * operands which match a certain "type" (implicit or explicit) 1.620 + * or which are accessed in certain ways (e.g. read or write). Note 1.621 + * that this operates on the operand list of single instruction, so 1.622 + * specifying the 'real' operand type (register, memory, etc) is not 1.623 + * useful. Note also that by definition Execute Access implies Read 1.624 + * Access and implies Not Write Access. 1.625 + * The "type" (implicit or explicit) and the access method can 1.626 + * be ORed together, e.g. op_wo | op_explicit */ 1.627 +enum x86_op_foreach_type { 1.628 + op_any = 0, /* ALL operands (explicit, implicit, rwx) */ 1.629 + op_dest = 1, /* operands with Write access */ 1.630 + op_src = 2, /* operands with Read access */ 1.631 + op_ro = 3, /* operands with Read but not Write access */ 1.632 + op_wo = 4, /* operands with Write but not Read access */ 1.633 + op_xo = 5, /* operands with Execute access */ 1.634 + op_rw = 6, /* operands with Read AND Write access */ 1.635 + op_implicit = 0x10, /* operands that are implied by the opcode */ 1.636 + op_explicit = 0x20 /* operands that are not side-effects */ 1.637 +}; 1.638 + 1.639 + 1.640 +/* free the operand list associated with an instruction -- useful for 1.641 + * preventing memory leaks when free()ing an x86_insn_t */ 1.642 +void x86_oplist_free( x86_insn_t *insn ); 1.643 + 1.644 +/* Operand foreach: invokes 'func' with 'insn' and 'arg' as arguments. The 1.645 + * 'type' parameter is used to select only operands matching specific 1.646 + * criteria. */ 1.647 +int x86_operand_foreach( x86_insn_t *insn, x86_operand_fn func, void *arg, 1.648 + enum x86_op_foreach_type type); 1.649 + 1.650 +/* convenience routine: returns count of operands matching 'type' */ 1.651 +size_t x86_operand_count( x86_insn_t *insn, enum x86_op_foreach_type type ); 1.652 + 1.653 +/* accessor functions for the operands */ 1.654 +x86_op_t * x86_operand_1st( x86_insn_t *insn ); 1.655 +x86_op_t * x86_operand_2nd( x86_insn_t *insn ); 1.656 +x86_op_t * x86_operand_3rd( x86_insn_t *insn ); 1.657 + 1.658 +/* these allow libdisasm 2.0 accessor functions to still be used */ 1.659 +#define x86_get_dest_operand( insn ) x86_operand_1st( insn ) 1.660 +#define x86_get_src_operand( insn ) x86_operand_2nd( insn ) 1.661 +#define x86_get_imm_operand( insn ) x86_operand_3rd( insn ) 1.662 + 1.663 +/* get size of operand data in bytes */ 1.664 +unsigned int x86_operand_size( x86_op_t *op ); 1.665 + 1.666 +/* Operand Convenience Routines: the following three routines are common 1.667 + * operations on operands, intended to ease the burden of the programmer. */ 1.668 + 1.669 +/* Get Address: return the value of an offset operand, or the offset of 1.670 + * a segment:offset absolute address */ 1.671 +uint32_t x86_get_address( x86_insn_t *insn ); 1.672 + 1.673 +/* Get Relative Offset: return as a sign-extended int32_t the near or far 1.674 + * relative offset operand, or 0 if there is none. There can be only one 1.675 + * relaive offset operand in an instruction. */ 1.676 +int32_t x86_get_rel_offset( x86_insn_t *insn ); 1.677 + 1.678 +/* Get Branch Target: return the x86_op_t containing the target of 1.679 + * a jump or call operand, or NULL if there is no branch target. 1.680 + * Internally, a 'branch target' is defined as any operand with 1.681 + * Execute Access set. There can be only one branch target per instruction. */ 1.682 +x86_op_t * x86_get_branch_target( x86_insn_t *insn ); 1.683 + 1.684 +/* Get Immediate: return the x86_op_t containing the immediate operand 1.685 + * for this instruction, or NULL if there is no immediate operand. There 1.686 + * can be only one immediate operand per instruction */ 1.687 +x86_op_t * x86_get_imm( x86_insn_t *insn ); 1.688 + 1.689 +/* Get Raw Immediate Data: returns a pointer to the immediate data encoded 1.690 + * in the instruction. This is useful for large data types [>32 bits] currently 1.691 + * not supported by libdisasm, or for determining if the disassembler 1.692 + * screwed up the conversion of the immediate data. Note that 'imm' in this 1.693 + * context refers to immediate data encoded at the end of an instruction as 1.694 + * detailed in the Intel Manual Vol II Chapter 2; it does not refer to the 1.695 + * 'op_imm' operand (the third operand in instructions like 'mul' */ 1.696 +unsigned char * x86_get_raw_imm( x86_insn_t *insn ); 1.697 + 1.698 + 1.699 +/* More accessor fuctions, this time for user-defined info... */ 1.700 +/* set the address (usually RVA) of the insn */ 1.701 +void x86_set_insn_addr( x86_insn_t *insn, uint32_t addr ); 1.702 + 1.703 +/* set the offset (usually offset into file) of the insn */ 1.704 +void x86_set_insn_offset( x86_insn_t *insn, unsigned int offset ); 1.705 + 1.706 +/* set a pointer to the function owning the instruction. The 1.707 + * type of 'func' is user-defined; libdisasm does not use the func field. */ 1.708 +void x86_set_insn_function( x86_insn_t *insn, void * func ); 1.709 + 1.710 +/* set a pointer to the block of code owning the instruction. The 1.711 + * type of 'block' is user-defined; libdisasm does not use the block field. */ 1.712 +void x86_set_insn_block( x86_insn_t *insn, void * block ); 1.713 + 1.714 +/* instruction tagging: these routines allow the programmer to mark 1.715 + * instructions as "seen" in a DFS, for example. libdisasm does not use 1.716 + * the tag field.*/ 1.717 +/* set insn->tag to 1 */ 1.718 +void x86_tag_insn( x86_insn_t *insn ); 1.719 +/* set insn->tag to 0 */ 1.720 +void x86_untag_insn( x86_insn_t *insn ); 1.721 +/* return insn->tag */ 1.722 +int x86_insn_is_tagged( x86_insn_t *insn ); 1.723 + 1.724 + 1.725 +/* Disassembly formats: 1.726 + * AT&T is standard AS/GAS-style: "mnemonic\tsrc, dest, imm" 1.727 + * Intel is standard MASM/NASM/TASM: "mnemonic\tdest,src, imm" 1.728 + * Native is tab-delimited: "RVA\tbytes\tmnemonic\tdest\tsrc\timm" 1.729 + * XML is your typical <insn> ... </insn> 1.730 + * Raw is addr|offset|size|bytes|prefix... see libdisasm_formats.7 1.731 + */ 1.732 +enum x86_asm_format { 1.733 + unknown_syntax = 0, /* never use! */ 1.734 + native_syntax, /* header: 35 bytes */ 1.735 + intel_syntax, /* header: 23 bytes */ 1.736 + att_syntax, /* header: 23 bytes */ 1.737 + xml_syntax, /* header: 679 bytes */ 1.738 + raw_syntax /* header: 172 bytes */ 1.739 +}; 1.740 + 1.741 +/* format (sprintf) an operand into 'buf' using specified syntax */ 1.742 +int x86_format_operand(x86_op_t *op, char *buf, int len, 1.743 + enum x86_asm_format format); 1.744 + 1.745 +/* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */ 1.746 +int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len, 1.747 + enum x86_asm_format format); 1.748 + 1.749 +/* format (sprintf) an instruction into 'buf' using specified syntax; 1.750 + * this includes formatting all operands */ 1.751 +int x86_format_insn(x86_insn_t *insn, char *buf, int len, enum x86_asm_format); 1.752 + 1.753 +/* fill 'buf' with a description of the format's syntax */ 1.754 +int x86_format_header( char *buf, int len, enum x86_asm_format format); 1.755 + 1.756 +/* Endianness of an x86 CPU : 0 is big, 1 is little; always returns 1 */ 1.757 +unsigned int x86_endian(void); 1.758 + 1.759 +/* Default address and operand size in bytes */ 1.760 +unsigned int x86_addr_size(void); 1.761 +unsigned int x86_op_size(void); 1.762 + 1.763 +/* Size of a machine word in bytes */ 1.764 +unsigned int x86_word_size(void); 1.765 + 1.766 +/* maximum size of a code instruction */ 1.767 +#define x86_max_inst_size(x) x86_max_insn_size(x) 1.768 +unsigned int x86_max_insn_size(void); 1.769 + 1.770 +/* register IDs of Stack, Frame, Instruction pointer and Flags register */ 1.771 +unsigned int x86_sp_reg(void); 1.772 +unsigned int x86_fp_reg(void); 1.773 +unsigned int x86_ip_reg(void); 1.774 +unsigned int x86_flag_reg(void); 1.775 + 1.776 +/* fill 'reg' struct with details of register 'id' */ 1.777 +void x86_reg_from_id( unsigned int id, x86_reg_t * reg ); 1.778 + 1.779 +/* convenience macro demonstrating how to get an aliased register; proto is 1.780 + * void x86_get_aliased_reg( x86_reg_t *alias_reg, x86_reg_t *output_reg ) 1.781 + * where 'alias_reg' is a reg operand and 'output_reg' is filled with the 1.782 + * register that the operand is an alias for */ 1.783 +#define x86_get_aliased_reg( alias_reg, output_reg ) \ 1.784 + x86_reg_from_id( alias_reg->alias, output_reg ) 1.785 + 1.786 + 1.787 +/* ================================== Invariant Instruction Representation */ 1.788 +/* Invariant instructions are used for generating binary signatures; 1.789 + * the instruction is modified so that all variant bytes in an instruction 1.790 + * are replaced with a wildcard byte. 1.791 + * 1.792 + * A 'variant byte' is one that is expected to be modified by either the 1.793 + * static or the dynamic linker: for example, an address encoded in an 1.794 + * instruction. 1.795 + * 1.796 + * By comparing the invariant representation of one instruction [or of a 1.797 + * sequence of instructions] with the invariant representation of another, 1.798 + * one determine whether the two invariant representations are from the same 1.799 + * relocatable object [.o] file. Thus one can use binary signatures [which 1.800 + * are just sequences of invariant instruction representations] to look for 1.801 + * library routines which have been statically-linked into a binary. 1.802 + * 1.803 + * The invariant routines are faster and smaller than the disassembly 1.804 + * routines; they can be used to determine the size of an instruction 1.805 + * without all of the overhead of a full instruction disassembly. 1.806 + */ 1.807 + 1.808 +/* This byte is used to replace variant bytes */ 1.809 +#define X86_WILDCARD_BYTE 0xF4 1.810 + 1.811 +typedef struct { 1.812 + enum x86_op_type type; /* operand type */ 1.813 + enum x86_op_datatype datatype; /* operand size */ 1.814 + enum x86_op_access access; /* operand access [RWX] */ 1.815 + enum x86_op_flags flags; /* misc flags */ 1.816 +} x86_invariant_op_t; 1.817 + 1.818 +typedef struct { 1.819 + unsigned char bytes[64]; /* invariant representation */ 1.820 + unsigned int size; /* number of bytes in insn */ 1.821 + enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */ 1.822 + enum x86_insn_type type; /* type, e.g. INS_BRANCH */ 1.823 + x86_invariant_op_t operands[3]; /* operands: dest, src, imm */ 1.824 +} x86_invariant_t; 1.825 + 1.826 + 1.827 +/* return a version of the instruction with the variant bytes masked out */ 1.828 +size_t x86_invariant_disasm( unsigned char *buf, int buf_len, 1.829 + x86_invariant_t *inv ); 1.830 +/* return the size in bytes of the intruction pointed to by 'buf'; 1.831 + * this used x86_invariant_disasm since it faster than x86_disasm */ 1.832 +size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len ); 1.833 + 1.834 +#ifdef __cplusplus 1.835 +} 1.836 +#endif 1.837 + 1.838 + 1.839 +#endif