toolkit/crashreporter/google-breakpad/src/third_party/libdisasm/libdis.h

Sat, 03 Jan 2015 20:18:00 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Sat, 03 Jan 2015 20:18:00 +0100
branch
TOR_BUG_3246
changeset 7
129ffea94266
permissions
-rw-r--r--

Conditionally enable double key logic according to:
private browsing mode or privacy.thirdparty.isolate preference and
implement in GetCookieStringCommon and FindCookie where it counts...
With some reservations of how to convince FindCookie users to test
condition and pass a nullptr when disabling double key logic.

michael@0 1 #ifndef LIBDISASM_H
michael@0 2 #define LIBDISASM_H
michael@0 3
michael@0 4 #ifdef WIN32
michael@0 5 #include <windows.h>
michael@0 6 #endif
michael@0 7
michael@0 8 #include <stdint.h>
michael@0 9
michael@0 10 /* 'NEW" types
michael@0 11 * __________________________________________________________________________*/
michael@0 12 #ifndef LIBDISASM_QWORD_H /* do not interfere with qword.h */
michael@0 13 #define LIBDISASM_QWORD_H
michael@0 14 #ifdef _MSC_VER
michael@0 15 typedef __int64 qword_t;
michael@0 16 #else
michael@0 17 typedef int64_t qword_t;
michael@0 18 #endif
michael@0 19 #endif
michael@0 20
michael@0 21 #include <sys/types.h>
michael@0 22
michael@0 23 #ifdef __cplusplus
michael@0 24 extern "C" {
michael@0 25 #endif
michael@0 26
michael@0 27 /* 'NEW" x86 API
michael@0 28 * __________________________________________________________________________*/
michael@0 29
michael@0 30
michael@0 31 /* ========================================= Error Reporting */
michael@0 32 /* REPORT CODES
michael@0 33 * These are passed to a reporter function passed at initialization.
michael@0 34 * Each code determines the type of the argument passed to the reporter;
michael@0 35 * this allows the report to recover from errors, or just log them.
michael@0 36 */
michael@0 37 enum x86_report_codes {
michael@0 38 report_disasm_bounds, /* RVA OUT OF BOUNDS : The disassembler could
michael@0 39 not disassemble the supplied RVA as it is
michael@0 40 out of the range of the buffer. The
michael@0 41 application should store the address and
michael@0 42 attempt to determine what section of the
michael@0 43 binary it is in, then disassemble the
michael@0 44 address from the bytes in that section.
michael@0 45 data: uint32_t rva */
michael@0 46 report_insn_bounds, /* INSTRUCTION OUT OF BOUNDS: The disassembler
michael@0 47 could not disassemble the instruction as
michael@0 48 the instruction would require bytes beyond
michael@0 49 the end of the current buffer. This usually
michael@0 50 indicated garbage bytes at the end of a
michael@0 51 buffer, or an incorrectly-sized buffer.
michael@0 52 data: uint32_t rva */
michael@0 53 report_invalid_insn, /* INVALID INSTRUCTION: The disassembler could
michael@0 54 not disassemble the instruction as it has an
michael@0 55 invalid combination of opcodes and operands.
michael@0 56 This will stop automated disassembly; the
michael@0 57 application can restart the disassembly
michael@0 58 after the invalid instruction.
michael@0 59 data: uint32_t rva */
michael@0 60 report_unknown
michael@0 61 };
michael@0 62
michael@0 63 /* 'arg' is optional arbitrary data provided by the code passing the
michael@0 64 * callback -- for example, it could be 'this' or 'self' in OOP code.
michael@0 65 * 'code' is provided by libdisasm, it is one of the above
michael@0 66 * 'data' is provided by libdisasm and is context-specific, per the enums */
michael@0 67 typedef void (*DISASM_REPORTER)( enum x86_report_codes code,
michael@0 68 void *data, void *arg );
michael@0 69
michael@0 70
michael@0 71 /* x86_report_error : Call the register reporter to report an error */
michael@0 72 void x86_report_error( enum x86_report_codes code, void *data );
michael@0 73
michael@0 74 /* ========================================= Libdisasm Management Routines */
michael@0 75 enum x86_options { /* these can be ORed together */
michael@0 76 opt_none= 0,
michael@0 77 opt_ignore_nulls=1, /* ignore sequences of > 4 NULL bytes */
michael@0 78 opt_16_bit=2, /* 16-bit/DOS disassembly */
michael@0 79 opt_att_mnemonics=4, /* use AT&T syntax names for alternate opcode mnemonics */
michael@0 80 };
michael@0 81
michael@0 82 /* management routines */
michael@0 83 /* 'arg' is caller-specific data which is passed as the first argument
michael@0 84 * to the reporter callback routine */
michael@0 85 int x86_init( enum x86_options options, DISASM_REPORTER reporter, void *arg);
michael@0 86 void x86_set_reporter( DISASM_REPORTER reporter, void *arg);
michael@0 87 void x86_set_options( enum x86_options options );
michael@0 88 enum x86_options x86_get_options( void );
michael@0 89 int x86_cleanup(void);
michael@0 90
michael@0 91
michael@0 92 /* ========================================= Instruction Representation */
michael@0 93 /* these defines are only intended for use in the array decl's */
michael@0 94 #define MAX_REGNAME 8
michael@0 95
michael@0 96 #define MAX_PREFIX_STR 32
michael@0 97 #define MAX_MNEM_STR 16
michael@0 98 #define MAX_INSN_SIZE 20 /* same as in i386.h */
michael@0 99 #define MAX_OP_STRING 32 /* max possible operand size in string form */
michael@0 100 #define MAX_OP_RAW_STRING 64 /* max possible operand size in raw form */
michael@0 101 #define MAX_OP_XML_STRING 256 /* max possible operand size in xml form */
michael@0 102 #define MAX_NUM_OPERANDS 8 /* max # implicit and explicit operands */
michael@0 103 /* in these, the '2 *' is arbitrary: the max # of operands should require
michael@0 104 * more space than the rest of the insn */
michael@0 105 #define MAX_INSN_STRING 512 /* 2 * 8 * MAX_OP_STRING */
michael@0 106 #define MAX_INSN_RAW_STRING 1024 /* 2 * 8 * MAX_OP_RAW_STRING */
michael@0 107 #define MAX_INSN_XML_STRING 4096 /* 2 * 8 * MAX_OP_XML_STRING */
michael@0 108
michael@0 109 enum x86_reg_type { /* NOTE: these may be ORed together */
michael@0 110 reg_gen = 0x00001, /* general purpose */
michael@0 111 reg_in = 0x00002, /* incoming args, ala RISC */
michael@0 112 reg_out = 0x00004, /* args to calls, ala RISC */
michael@0 113 reg_local = 0x00008, /* local vars, ala RISC */
michael@0 114 reg_fpu = 0x00010, /* FPU data register */
michael@0 115 reg_seg = 0x00020, /* segment register */
michael@0 116 reg_simd = 0x00040, /* SIMD/MMX reg */
michael@0 117 reg_sys = 0x00080, /* restricted/system register */
michael@0 118 reg_sp = 0x00100, /* stack pointer */
michael@0 119 reg_fp = 0x00200, /* frame pointer */
michael@0 120 reg_pc = 0x00400, /* program counter */
michael@0 121 reg_retaddr = 0x00800, /* return addr for func */
michael@0 122 reg_cond = 0x01000, /* condition code / flags */
michael@0 123 reg_zero = 0x02000, /* zero register, ala RISC */
michael@0 124 reg_ret = 0x04000, /* return value */
michael@0 125 reg_src = 0x10000, /* array/rep source */
michael@0 126 reg_dest = 0x20000, /* array/rep destination */
michael@0 127 reg_count = 0x40000 /* array/rep/loop counter */
michael@0 128 };
michael@0 129
michael@0 130 /* x86_reg_t : an X86 CPU register */
michael@0 131 typedef struct {
michael@0 132 char name[MAX_REGNAME];
michael@0 133 enum x86_reg_type type; /* what register is used for */
michael@0 134 unsigned int size; /* size of register in bytes */
michael@0 135 unsigned int id; /* register ID #, for quick compares */
michael@0 136 unsigned int alias; /* ID of reg this is an alias for */
michael@0 137 unsigned int shift; /* amount to shift aliased reg by */
michael@0 138 } x86_reg_t;
michael@0 139
michael@0 140 /* x86_ea_t : an X86 effective address (address expression) */
michael@0 141 typedef struct {
michael@0 142 unsigned int scale; /* scale factor */
michael@0 143 x86_reg_t index, base; /* index, base registers */
michael@0 144 int32_t disp; /* displacement */
michael@0 145 char disp_sign; /* is negative? 1/0 */
michael@0 146 char disp_size; /* 0, 1, 2, 4 */
michael@0 147 } x86_ea_t;
michael@0 148
michael@0 149 /* x86_absolute_t : an X86 segment:offset address (descriptor) */
michael@0 150 typedef struct {
michael@0 151 unsigned short segment; /* loaded directly into CS */
michael@0 152 union {
michael@0 153 unsigned short off16; /* loaded directly into IP */
michael@0 154 uint32_t off32; /* loaded directly into EIP */
michael@0 155 } offset;
michael@0 156 } x86_absolute_t;
michael@0 157
michael@0 158 enum x86_op_type { /* mutually exclusive */
michael@0 159 op_unused = 0, /* empty/unused operand: should never occur */
michael@0 160 op_register = 1, /* CPU register */
michael@0 161 op_immediate = 2, /* Immediate Value */
michael@0 162 op_relative_near = 3, /* Relative offset from IP */
michael@0 163 op_relative_far = 4, /* Relative offset from IP */
michael@0 164 op_absolute = 5, /* Absolute address (ptr16:32) */
michael@0 165 op_expression = 6, /* Address expression (scale/index/base/disp) */
michael@0 166 op_offset = 7, /* Offset from start of segment (m32) */
michael@0 167 op_unknown
michael@0 168 };
michael@0 169
michael@0 170 #define x86_optype_is_address( optype ) \
michael@0 171 ( optype == op_absolute || optype == op_offset )
michael@0 172 #define x86_optype_is_relative( optype ) \
michael@0 173 ( optype == op_relative_near || optype == op_relative_far )
michael@0 174 #define x86_optype_is_memory( optype ) \
michael@0 175 ( optype > op_immediate && optype < op_unknown )
michael@0 176
michael@0 177 enum x86_op_datatype { /* these use Intel's lame terminology */
michael@0 178 op_byte = 1, /* 1 byte integer */
michael@0 179 op_word = 2, /* 2 byte integer */
michael@0 180 op_dword = 3, /* 4 byte integer */
michael@0 181 op_qword = 4, /* 8 byte integer */
michael@0 182 op_dqword = 5, /* 16 byte integer */
michael@0 183 op_sreal = 6, /* 4 byte real (single real) */
michael@0 184 op_dreal = 7, /* 8 byte real (double real) */
michael@0 185 op_extreal = 8, /* 10 byte real (extended real) */
michael@0 186 op_bcd = 9, /* 10 byte binary-coded decimal */
michael@0 187 op_ssimd = 10, /* 16 byte : 4 packed single FP (SIMD, MMX) */
michael@0 188 op_dsimd = 11, /* 16 byte : 2 packed double FP (SIMD, MMX) */
michael@0 189 op_sssimd = 12, /* 4 byte : scalar single FP (SIMD, MMX) */
michael@0 190 op_sdsimd = 13, /* 8 byte : scalar double FP (SIMD, MMX) */
michael@0 191 op_descr32 = 14, /* 6 byte Intel descriptor 2:4 */
michael@0 192 op_descr16 = 15, /* 4 byte Intel descriptor 2:2 */
michael@0 193 op_pdescr32 = 16, /* 6 byte Intel pseudo-descriptor 32:16 */
michael@0 194 op_pdescr16 = 17, /* 6 byte Intel pseudo-descriptor 8:24:16 */
michael@0 195 op_bounds16 = 18, /* signed 16:16 lower:upper bounds */
michael@0 196 op_bounds32 = 19, /* signed 32:32 lower:upper bounds */
michael@0 197 op_fpuenv16 = 20, /* 14 byte FPU control/environment data */
michael@0 198 op_fpuenv32 = 21, /* 28 byte FPU control/environment data */
michael@0 199 op_fpustate16 = 22, /* 94 byte FPU state (env & reg stack) */
michael@0 200 op_fpustate32 = 23, /* 108 byte FPU state (env & reg stack) */
michael@0 201 op_fpregset = 24, /* 512 bytes: register set */
michael@0 202 op_fpreg = 25, /* FPU register */
michael@0 203 op_none = 0xFF, /* operand without a datatype (INVLPG) */
michael@0 204 };
michael@0 205
michael@0 206 enum x86_op_access { /* ORed together */
michael@0 207 op_read = 1,
michael@0 208 op_write = 2,
michael@0 209 op_execute = 4
michael@0 210 };
michael@0 211
michael@0 212 enum x86_op_flags { /* ORed together, but segs are mutually exclusive */
michael@0 213 op_signed = 1, /* signed integer */
michael@0 214 op_string = 2, /* possible string or array */
michael@0 215 op_constant = 4, /* symbolic constant */
michael@0 216 op_pointer = 8, /* operand points to a memory address */
michael@0 217 op_sysref = 0x010, /* operand is a syscall number */
michael@0 218 op_implied = 0x020, /* operand is implicit in the insn */
michael@0 219 op_hardcode = 0x40, /* operand is hardcoded in insn definition */
michael@0 220 /* NOTE: an 'implied' operand is one which can be considered a side
michael@0 221 * effect of the insn, e.g. %esp being modified by PUSH or POP. A
michael@0 222 * 'hard-coded' operand is one which is specified in the instruction
michael@0 223 * definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference
michael@0 224 * is that hard-coded operands are printed by disassemblers and are
michael@0 225 * required to re-assemble, while implicit operands are invisible. */
michael@0 226 op_es_seg = 0x100, /* ES segment override */
michael@0 227 op_cs_seg = 0x200, /* CS segment override */
michael@0 228 op_ss_seg = 0x300, /* SS segment override */
michael@0 229 op_ds_seg = 0x400, /* DS segment override */
michael@0 230 op_fs_seg = 0x500, /* FS segment override */
michael@0 231 op_gs_seg = 0x600 /* GS segment override */
michael@0 232 };
michael@0 233
michael@0 234 /* x86_op_t : an X86 instruction operand */
michael@0 235 typedef struct {
michael@0 236 enum x86_op_type type; /* operand type */
michael@0 237 enum x86_op_datatype datatype; /* operand size */
michael@0 238 enum x86_op_access access; /* operand access [RWX] */
michael@0 239 enum x86_op_flags flags; /* misc flags */
michael@0 240 union {
michael@0 241 /* sizeof will have to work on these union members! */
michael@0 242 /* immediate values */
michael@0 243 char sbyte;
michael@0 244 short sword;
michael@0 245 int32_t sdword;
michael@0 246 qword_t sqword;
michael@0 247 unsigned char byte;
michael@0 248 unsigned short word;
michael@0 249 uint32_t dword;
michael@0 250 qword_t qword;
michael@0 251 float sreal;
michael@0 252 double dreal;
michael@0 253 /* misc large/non-native types */
michael@0 254 unsigned char extreal[10];
michael@0 255 unsigned char bcd[10];
michael@0 256 qword_t dqword[2];
michael@0 257 unsigned char simd[16];
michael@0 258 unsigned char fpuenv[28];
michael@0 259 /* offset from segment */
michael@0 260 uint32_t offset;
michael@0 261 /* ID of CPU register */
michael@0 262 x86_reg_t reg;
michael@0 263 /* offsets from current insn */
michael@0 264 char relative_near;
michael@0 265 int32_t relative_far;
michael@0 266 /* segment:offset */
michael@0 267 x86_absolute_t absolute;
michael@0 268 /* effective address [expression] */
michael@0 269 x86_ea_t expression;
michael@0 270 } data;
michael@0 271 /* this is needed to make formatting operands more sane */
michael@0 272 void * insn; /* pointer to x86_insn_t owning operand */
michael@0 273 } x86_op_t;
michael@0 274
michael@0 275 /* Linked list of x86_op_t; provided for manual traversal of the operand
michael@0 276 * list in an insn. Users wishing to add operands to this list, e.g. to add
michael@0 277 * implicit operands, should use x86_operand_new in x86_operand_list.h */
michael@0 278 typedef struct x86_operand_list {
michael@0 279 x86_op_t op;
michael@0 280 struct x86_operand_list *next;
michael@0 281 } x86_oplist_t;
michael@0 282
michael@0 283 enum x86_insn_group {
michael@0 284 insn_none = 0, /* invalid instruction */
michael@0 285 insn_controlflow = 1,
michael@0 286 insn_arithmetic = 2,
michael@0 287 insn_logic = 3,
michael@0 288 insn_stack = 4,
michael@0 289 insn_comparison = 5,
michael@0 290 insn_move = 6,
michael@0 291 insn_string = 7,
michael@0 292 insn_bit_manip = 8,
michael@0 293 insn_flag_manip = 9,
michael@0 294 insn_fpu = 10,
michael@0 295 insn_interrupt = 13,
michael@0 296 insn_system = 14,
michael@0 297 insn_other = 15
michael@0 298 };
michael@0 299
michael@0 300 enum x86_insn_type {
michael@0 301 insn_invalid = 0, /* invalid instruction */
michael@0 302 /* insn_controlflow */
michael@0 303 insn_jmp = 0x1001,
michael@0 304 insn_jcc = 0x1002,
michael@0 305 insn_call = 0x1003,
michael@0 306 insn_callcc = 0x1004,
michael@0 307 insn_return = 0x1005,
michael@0 308 /* insn_arithmetic */
michael@0 309 insn_add = 0x2001,
michael@0 310 insn_sub = 0x2002,
michael@0 311 insn_mul = 0x2003,
michael@0 312 insn_div = 0x2004,
michael@0 313 insn_inc = 0x2005,
michael@0 314 insn_dec = 0x2006,
michael@0 315 insn_shl = 0x2007,
michael@0 316 insn_shr = 0x2008,
michael@0 317 insn_rol = 0x2009,
michael@0 318 insn_ror = 0x200A,
michael@0 319 /* insn_logic */
michael@0 320 insn_and = 0x3001,
michael@0 321 insn_or = 0x3002,
michael@0 322 insn_xor = 0x3003,
michael@0 323 insn_not = 0x3004,
michael@0 324 insn_neg = 0x3005,
michael@0 325 /* insn_stack */
michael@0 326 insn_push = 0x4001,
michael@0 327 insn_pop = 0x4002,
michael@0 328 insn_pushregs = 0x4003,
michael@0 329 insn_popregs = 0x4004,
michael@0 330 insn_pushflags = 0x4005,
michael@0 331 insn_popflags = 0x4006,
michael@0 332 insn_enter = 0x4007,
michael@0 333 insn_leave = 0x4008,
michael@0 334 /* insn_comparison */
michael@0 335 insn_test = 0x5001,
michael@0 336 insn_cmp = 0x5002,
michael@0 337 /* insn_move */
michael@0 338 insn_mov = 0x6001, /* move */
michael@0 339 insn_movcc = 0x6002, /* conditional move */
michael@0 340 insn_xchg = 0x6003, /* exchange */
michael@0 341 insn_xchgcc = 0x6004, /* conditional exchange */
michael@0 342 /* insn_string */
michael@0 343 insn_strcmp = 0x7001,
michael@0 344 insn_strload = 0x7002,
michael@0 345 insn_strmov = 0x7003,
michael@0 346 insn_strstore = 0x7004,
michael@0 347 insn_translate = 0x7005, /* xlat */
michael@0 348 /* insn_bit_manip */
michael@0 349 insn_bittest = 0x8001,
michael@0 350 insn_bitset = 0x8002,
michael@0 351 insn_bitclear = 0x8003,
michael@0 352 /* insn_flag_manip */
michael@0 353 insn_clear_carry = 0x9001,
michael@0 354 insn_clear_zero = 0x9002,
michael@0 355 insn_clear_oflow = 0x9003,
michael@0 356 insn_clear_dir = 0x9004,
michael@0 357 insn_clear_sign = 0x9005,
michael@0 358 insn_clear_parity = 0x9006,
michael@0 359 insn_set_carry = 0x9007,
michael@0 360 insn_set_zero = 0x9008,
michael@0 361 insn_set_oflow = 0x9009,
michael@0 362 insn_set_dir = 0x900A,
michael@0 363 insn_set_sign = 0x900B,
michael@0 364 insn_set_parity = 0x900C,
michael@0 365 insn_tog_carry = 0x9010,
michael@0 366 insn_tog_zero = 0x9020,
michael@0 367 insn_tog_oflow = 0x9030,
michael@0 368 insn_tog_dir = 0x9040,
michael@0 369 insn_tog_sign = 0x9050,
michael@0 370 insn_tog_parity = 0x9060,
michael@0 371 /* insn_fpu */
michael@0 372 insn_fmov = 0xA001,
michael@0 373 insn_fmovcc = 0xA002,
michael@0 374 insn_fneg = 0xA003,
michael@0 375 insn_fabs = 0xA004,
michael@0 376 insn_fadd = 0xA005,
michael@0 377 insn_fsub = 0xA006,
michael@0 378 insn_fmul = 0xA007,
michael@0 379 insn_fdiv = 0xA008,
michael@0 380 insn_fsqrt = 0xA009,
michael@0 381 insn_fcmp = 0xA00A,
michael@0 382 insn_fcos = 0xA00C,
michael@0 383 insn_fldpi = 0xA00D,
michael@0 384 insn_fldz = 0xA00E,
michael@0 385 insn_ftan = 0xA00F,
michael@0 386 insn_fsine = 0xA010,
michael@0 387 insn_fsys = 0xA020,
michael@0 388 /* insn_interrupt */
michael@0 389 insn_int = 0xD001,
michael@0 390 insn_intcc = 0xD002, /* not present in x86 ISA */
michael@0 391 insn_iret = 0xD003,
michael@0 392 insn_bound = 0xD004,
michael@0 393 insn_debug = 0xD005,
michael@0 394 insn_trace = 0xD006,
michael@0 395 insn_invalid_op = 0xD007,
michael@0 396 insn_oflow = 0xD008,
michael@0 397 /* insn_system */
michael@0 398 insn_halt = 0xE001,
michael@0 399 insn_in = 0xE002, /* input from port/bus */
michael@0 400 insn_out = 0xE003, /* output to port/bus */
michael@0 401 insn_cpuid = 0xE004,
michael@0 402 /* insn_other */
michael@0 403 insn_nop = 0xF001,
michael@0 404 insn_bcdconv = 0xF002, /* convert to or from BCD */
michael@0 405 insn_szconv = 0xF003 /* change size of operand */
michael@0 406 };
michael@0 407
michael@0 408 /* These flags specify special characteristics of the instruction, such as
michael@0 409 * whether the inatruction is privileged or whether it serializes the
michael@0 410 * pipeline.
michael@0 411 * NOTE : These may not be accurate for all instructions; updates to the
michael@0 412 * opcode tables have not been completed. */
michael@0 413 enum x86_insn_note {
michael@0 414 insn_note_ring0 = 1, /* Only available in ring 0 */
michael@0 415 insn_note_smm = 2, /* "" in System Management Mode */
michael@0 416 insn_note_serial = 4, /* Serializing instruction */
michael@0 417 insn_note_nonswap = 8, /* Does not swap arguments in att-style formatting */
michael@0 418 insn_note_nosuffix = 16, /* Does not have size suffix in att-style formatting */
michael@0 419 };
michael@0 420
michael@0 421 /* This specifies what effects the instruction has on the %eflags register */
michael@0 422 enum x86_flag_status {
michael@0 423 insn_carry_set = 0x1, /* CF */
michael@0 424 insn_zero_set = 0x2, /* ZF */
michael@0 425 insn_oflow_set = 0x4, /* OF */
michael@0 426 insn_dir_set = 0x8, /* DF */
michael@0 427 insn_sign_set = 0x10, /* SF */
michael@0 428 insn_parity_set = 0x20, /* PF */
michael@0 429 insn_carry_or_zero_set = 0x40,
michael@0 430 insn_zero_set_or_sign_ne_oflow = 0x80,
michael@0 431 insn_carry_clear = 0x100,
michael@0 432 insn_zero_clear = 0x200,
michael@0 433 insn_oflow_clear = 0x400,
michael@0 434 insn_dir_clear = 0x800,
michael@0 435 insn_sign_clear = 0x1000,
michael@0 436 insn_parity_clear = 0x2000,
michael@0 437 insn_sign_eq_oflow = 0x4000,
michael@0 438 insn_sign_ne_oflow = 0x8000
michael@0 439 };
michael@0 440
michael@0 441 /* The CPU model in which the insturction first appeared; this can be used
michael@0 442 * to mask out instructions appearing in earlier or later models or to
michael@0 443 * check the portability of a binary.
michael@0 444 * NOTE : These may not be accurate for all instructions; updates to the
michael@0 445 * opcode tables have not been completed. */
michael@0 446 enum x86_insn_cpu {
michael@0 447 cpu_8086 = 1, /* Intel */
michael@0 448 cpu_80286 = 2,
michael@0 449 cpu_80386 = 3,
michael@0 450 cpu_80387 = 4,
michael@0 451 cpu_80486 = 5,
michael@0 452 cpu_pentium = 6,
michael@0 453 cpu_pentiumpro = 7,
michael@0 454 cpu_pentium2 = 8,
michael@0 455 cpu_pentium3 = 9,
michael@0 456 cpu_pentium4 = 10,
michael@0 457 cpu_k6 = 16, /* AMD */
michael@0 458 cpu_k7 = 32,
michael@0 459 cpu_athlon = 48
michael@0 460 };
michael@0 461
michael@0 462 /* CPU ISA subsets: These are derived from the Instruction Groups in
michael@0 463 * Intel Vol 1 Chapter 5; they represent subsets of the IA32 ISA but
michael@0 464 * do not reflect the 'type' of the instruction in the same way that
michael@0 465 * x86_insn_group does. In short, these are AMD/Intel's somewhat useless
michael@0 466 * designations.
michael@0 467 * NOTE : These may not be accurate for all instructions; updates to the
michael@0 468 * opcode tables have not been completed. */
michael@0 469 enum x86_insn_isa {
michael@0 470 isa_gp = 1, /* general purpose */
michael@0 471 isa_fp = 2, /* floating point */
michael@0 472 isa_fpumgt = 3, /* FPU/SIMD management */
michael@0 473 isa_mmx = 4, /* Intel MMX */
michael@0 474 isa_sse1 = 5, /* Intel SSE SIMD */
michael@0 475 isa_sse2 = 6, /* Intel SSE2 SIMD */
michael@0 476 isa_sse3 = 7, /* Intel SSE3 SIMD */
michael@0 477 isa_3dnow = 8, /* AMD 3DNow! SIMD */
michael@0 478 isa_sys = 9 /* system instructions */
michael@0 479 };
michael@0 480
michael@0 481 enum x86_insn_prefix {
michael@0 482 insn_no_prefix = 0,
michael@0 483 insn_rep_zero = 1, /* REPZ and REPE */
michael@0 484 insn_rep_notzero = 2, /* REPNZ and REPNZ */
michael@0 485 insn_lock = 4 /* LOCK: */
michael@0 486 };
michael@0 487
michael@0 488 /* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */
michael@0 489 /* x86_insn_t : an X86 instruction */
michael@0 490 typedef struct {
michael@0 491 /* information about the instruction */
michael@0 492 uint32_t addr; /* load address */
michael@0 493 uint32_t offset; /* offset into file/buffer */
michael@0 494 enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */
michael@0 495 enum x86_insn_type type; /* type, e.g. INS_BRANCH */
michael@0 496 enum x86_insn_note note; /* note, e.g. RING0 */
michael@0 497 unsigned char bytes[MAX_INSN_SIZE];
michael@0 498 unsigned char size; /* size of insn in bytes */
michael@0 499 /* 16/32-bit mode settings */
michael@0 500 unsigned char addr_size; /* default address size : 2 or 4 */
michael@0 501 unsigned char op_size; /* default operand size : 2 or 4 */
michael@0 502 /* CPU/instruction set */
michael@0 503 enum x86_insn_cpu cpu;
michael@0 504 enum x86_insn_isa isa;
michael@0 505 /* flags */
michael@0 506 enum x86_flag_status flags_set; /* flags set or tested by insn */
michael@0 507 enum x86_flag_status flags_tested;
michael@0 508 /* stack */
michael@0 509 unsigned char stack_mod; /* 0 or 1 : is the stack modified? */
michael@0 510 int32_t stack_mod_val; /* val stack is modified by if known */
michael@0 511
michael@0 512 /* the instruction proper */
michael@0 513 enum x86_insn_prefix prefix; /* prefixes ORed together */
michael@0 514 char prefix_string[MAX_PREFIX_STR]; /* prefixes [might be truncated] */
michael@0 515 char mnemonic[MAX_MNEM_STR];
michael@0 516 x86_oplist_t *operands; /* list of explicit/implicit operands */
michael@0 517 size_t operand_count; /* total number of operands */
michael@0 518 size_t explicit_count; /* number of explicit operands */
michael@0 519 /* convenience fields for user */
michael@0 520 void *block; /* code block containing this insn */
michael@0 521 void *function; /* function containing this insn */
michael@0 522 int tag; /* tag the insn as seen/processed */
michael@0 523 } x86_insn_t;
michael@0 524
michael@0 525
michael@0 526 /* returns 0 if an instruction is invalid, 1 if valid */
michael@0 527 int x86_insn_is_valid( x86_insn_t *insn );
michael@0 528
michael@0 529 /* DISASSEMBLY ROUTINES
michael@0 530 * Canonical order of arguments is
michael@0 531 * (buf, buf_len, buf_rva, offset, len, insn, func, arg, resolve_func)
michael@0 532 * ...but of course all of these are not used at the same time.
michael@0 533 */
michael@0 534
michael@0 535
michael@0 536 /* Function prototype for caller-supplied callback routine
michael@0 537 * These callbacks are intended to process 'insn' further, e.g. by
michael@0 538 * adding it to a linked list, database, etc */
michael@0 539 typedef void (*DISASM_CALLBACK)( x86_insn_t *insn, void * arg );
michael@0 540
michael@0 541 /* Function prototype for caller-supplied address resolver.
michael@0 542 * This routine is used to determine the rva to disassemble next, given
michael@0 543 * the 'dest' operand of a jump/call. This allows the caller to resolve
michael@0 544 * jump/call targets stored in a register or on the stack, and also allows
michael@0 545 * the caller to prevent endless loops by checking if an address has
michael@0 546 * already been disassembled. If an address cannot be resolved from the
michael@0 547 * operand, or if the address has already been disassembled, this routine
michael@0 548 * should return -1; in all other cases the RVA to be disassembled next
michael@0 549 * should be returned. */
michael@0 550 typedef int32_t (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t * current_insn,
michael@0 551 void *arg );
michael@0 552
michael@0 553
michael@0 554 /* x86_disasm: Disassemble a single instruction from a buffer of bytes.
michael@0 555 * Returns size of instruction in bytes.
michael@0 556 * Caller is responsible for calling x86_oplist_free() on
michael@0 557 * a reused "insn" to avoid leaking memory when calling this
michael@0 558 * function repeatedly.
michael@0 559 * buf : Buffer of bytes to disassemble
michael@0 560 * buf_len : Length of the buffer
michael@0 561 * buf_rva : Load address of the start of the buffer
michael@0 562 * offset : Offset in buffer to disassemble
michael@0 563 * insn : Structure to fill with disassembled instruction
michael@0 564 */
michael@0 565 unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len,
michael@0 566 uint32_t buf_rva, unsigned int offset,
michael@0 567 x86_insn_t * insn );
michael@0 568
michael@0 569 /* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer,
michael@0 570 * invoking a callback function each time an instruction
michael@0 571 * is successfully disassembled. The 'range' refers to the
michael@0 572 * bytes between 'offset' and 'offset + len' in the buffer;
michael@0 573 * 'len' is assumed to be less than the length of the buffer.
michael@0 574 * Returns number of instructions processed.
michael@0 575 * buf : Buffer of bytes to disassemble (e.g. .text section)
michael@0 576 * buf_rva : Load address of buffer (e.g. ELF Virtual Address)
michael@0 577 * offset : Offset in buffer to start disassembly at
michael@0 578 * len : Number of bytes to disassemble
michael@0 579 * func : Callback function to invoke (may be NULL)
michael@0 580 * arg : Arbitrary data to pass to callback (may be NULL)
michael@0 581 */
michael@0 582 unsigned int x86_disasm_range( unsigned char *buf, uint32_t buf_rva,
michael@0 583 unsigned int offset, unsigned int len,
michael@0 584 DISASM_CALLBACK func, void *arg );
michael@0 585
michael@0 586 /* x86_disasm_forward: Flow-of-execution disassembly of the bytes in a buffer,
michael@0 587 * invoking a callback function each time an instruction
michael@0 588 * is successfully disassembled.
michael@0 589 * buf : Buffer to disassemble (e.g. .text section)
michael@0 590 * buf_len : Number of bytes in buffer
michael@0 591 * buf_rva : Load address of buffer (e.g. ELF Virtual Address)
michael@0 592 * offset : Offset in buffer to start disassembly at (e.g. entry point)
michael@0 593 * func : Callback function to invoke (may be NULL)
michael@0 594 * arg : Arbitrary data to pass to callback (may be NULL)
michael@0 595 * resolver: Caller-supplied address resolver. If no resolver is
michael@0 596 * supplied, a default internal one is used -- however the
michael@0 597 * internal resolver does NOT catch loops and could end up
michael@0 598 * disassembling forever..
michael@0 599 * r_arg : Arbitrary data to pass to resolver (may be NULL)
michael@0 600 */
michael@0 601 unsigned int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
michael@0 602 uint32_t buf_rva, unsigned int offset,
michael@0 603 DISASM_CALLBACK func, void *arg,
michael@0 604 DISASM_RESOLVER resolver, void *r_arg );
michael@0 605
michael@0 606 /* Instruction operands: these are stored as a list of explicit and
michael@0 607 * implicit operands. It is recommended that the 'foreach' routines
michael@0 608 * be used to when examining operands for purposes of data flow analysis */
michael@0 609
michael@0 610 /* Operand FOREACH callback: 'arg' is an abritrary parameter passed to the
michael@0 611 * foreach routine, 'insn' is the x86_insn_t whose operands are being
michael@0 612 * iterated over, and 'op' is the current x86_op_t */
michael@0 613 typedef void (*x86_operand_fn)(x86_op_t *op, x86_insn_t *insn, void *arg);
michael@0 614
michael@0 615 /* FOREACH types: these are used to limit the foreach results to
michael@0 616 * operands which match a certain "type" (implicit or explicit)
michael@0 617 * or which are accessed in certain ways (e.g. read or write). Note
michael@0 618 * that this operates on the operand list of single instruction, so
michael@0 619 * specifying the 'real' operand type (register, memory, etc) is not
michael@0 620 * useful. Note also that by definition Execute Access implies Read
michael@0 621 * Access and implies Not Write Access.
michael@0 622 * The "type" (implicit or explicit) and the access method can
michael@0 623 * be ORed together, e.g. op_wo | op_explicit */
michael@0 624 enum x86_op_foreach_type {
michael@0 625 op_any = 0, /* ALL operands (explicit, implicit, rwx) */
michael@0 626 op_dest = 1, /* operands with Write access */
michael@0 627 op_src = 2, /* operands with Read access */
michael@0 628 op_ro = 3, /* operands with Read but not Write access */
michael@0 629 op_wo = 4, /* operands with Write but not Read access */
michael@0 630 op_xo = 5, /* operands with Execute access */
michael@0 631 op_rw = 6, /* operands with Read AND Write access */
michael@0 632 op_implicit = 0x10, /* operands that are implied by the opcode */
michael@0 633 op_explicit = 0x20 /* operands that are not side-effects */
michael@0 634 };
michael@0 635
michael@0 636
michael@0 637 /* free the operand list associated with an instruction -- useful for
michael@0 638 * preventing memory leaks when free()ing an x86_insn_t */
michael@0 639 void x86_oplist_free( x86_insn_t *insn );
michael@0 640
michael@0 641 /* Operand foreach: invokes 'func' with 'insn' and 'arg' as arguments. The
michael@0 642 * 'type' parameter is used to select only operands matching specific
michael@0 643 * criteria. */
michael@0 644 int x86_operand_foreach( x86_insn_t *insn, x86_operand_fn func, void *arg,
michael@0 645 enum x86_op_foreach_type type);
michael@0 646
michael@0 647 /* convenience routine: returns count of operands matching 'type' */
michael@0 648 size_t x86_operand_count( x86_insn_t *insn, enum x86_op_foreach_type type );
michael@0 649
michael@0 650 /* accessor functions for the operands */
michael@0 651 x86_op_t * x86_operand_1st( x86_insn_t *insn );
michael@0 652 x86_op_t * x86_operand_2nd( x86_insn_t *insn );
michael@0 653 x86_op_t * x86_operand_3rd( x86_insn_t *insn );
michael@0 654
michael@0 655 /* these allow libdisasm 2.0 accessor functions to still be used */
michael@0 656 #define x86_get_dest_operand( insn ) x86_operand_1st( insn )
michael@0 657 #define x86_get_src_operand( insn ) x86_operand_2nd( insn )
michael@0 658 #define x86_get_imm_operand( insn ) x86_operand_3rd( insn )
michael@0 659
michael@0 660 /* get size of operand data in bytes */
michael@0 661 unsigned int x86_operand_size( x86_op_t *op );
michael@0 662
michael@0 663 /* Operand Convenience Routines: the following three routines are common
michael@0 664 * operations on operands, intended to ease the burden of the programmer. */
michael@0 665
michael@0 666 /* Get Address: return the value of an offset operand, or the offset of
michael@0 667 * a segment:offset absolute address */
michael@0 668 uint32_t x86_get_address( x86_insn_t *insn );
michael@0 669
michael@0 670 /* Get Relative Offset: return as a sign-extended int32_t the near or far
michael@0 671 * relative offset operand, or 0 if there is none. There can be only one
michael@0 672 * relaive offset operand in an instruction. */
michael@0 673 int32_t x86_get_rel_offset( x86_insn_t *insn );
michael@0 674
michael@0 675 /* Get Branch Target: return the x86_op_t containing the target of
michael@0 676 * a jump or call operand, or NULL if there is no branch target.
michael@0 677 * Internally, a 'branch target' is defined as any operand with
michael@0 678 * Execute Access set. There can be only one branch target per instruction. */
michael@0 679 x86_op_t * x86_get_branch_target( x86_insn_t *insn );
michael@0 680
michael@0 681 /* Get Immediate: return the x86_op_t containing the immediate operand
michael@0 682 * for this instruction, or NULL if there is no immediate operand. There
michael@0 683 * can be only one immediate operand per instruction */
michael@0 684 x86_op_t * x86_get_imm( x86_insn_t *insn );
michael@0 685
michael@0 686 /* Get Raw Immediate Data: returns a pointer to the immediate data encoded
michael@0 687 * in the instruction. This is useful for large data types [>32 bits] currently
michael@0 688 * not supported by libdisasm, or for determining if the disassembler
michael@0 689 * screwed up the conversion of the immediate data. Note that 'imm' in this
michael@0 690 * context refers to immediate data encoded at the end of an instruction as
michael@0 691 * detailed in the Intel Manual Vol II Chapter 2; it does not refer to the
michael@0 692 * 'op_imm' operand (the third operand in instructions like 'mul' */
michael@0 693 unsigned char * x86_get_raw_imm( x86_insn_t *insn );
michael@0 694
michael@0 695
michael@0 696 /* More accessor fuctions, this time for user-defined info... */
michael@0 697 /* set the address (usually RVA) of the insn */
michael@0 698 void x86_set_insn_addr( x86_insn_t *insn, uint32_t addr );
michael@0 699
michael@0 700 /* set the offset (usually offset into file) of the insn */
michael@0 701 void x86_set_insn_offset( x86_insn_t *insn, unsigned int offset );
michael@0 702
michael@0 703 /* set a pointer to the function owning the instruction. The
michael@0 704 * type of 'func' is user-defined; libdisasm does not use the func field. */
michael@0 705 void x86_set_insn_function( x86_insn_t *insn, void * func );
michael@0 706
michael@0 707 /* set a pointer to the block of code owning the instruction. The
michael@0 708 * type of 'block' is user-defined; libdisasm does not use the block field. */
michael@0 709 void x86_set_insn_block( x86_insn_t *insn, void * block );
michael@0 710
michael@0 711 /* instruction tagging: these routines allow the programmer to mark
michael@0 712 * instructions as "seen" in a DFS, for example. libdisasm does not use
michael@0 713 * the tag field.*/
michael@0 714 /* set insn->tag to 1 */
michael@0 715 void x86_tag_insn( x86_insn_t *insn );
michael@0 716 /* set insn->tag to 0 */
michael@0 717 void x86_untag_insn( x86_insn_t *insn );
michael@0 718 /* return insn->tag */
michael@0 719 int x86_insn_is_tagged( x86_insn_t *insn );
michael@0 720
michael@0 721
michael@0 722 /* Disassembly formats:
michael@0 723 * AT&T is standard AS/GAS-style: "mnemonic\tsrc, dest, imm"
michael@0 724 * Intel is standard MASM/NASM/TASM: "mnemonic\tdest,src, imm"
michael@0 725 * Native is tab-delimited: "RVA\tbytes\tmnemonic\tdest\tsrc\timm"
michael@0 726 * XML is your typical <insn> ... </insn>
michael@0 727 * Raw is addr|offset|size|bytes|prefix... see libdisasm_formats.7
michael@0 728 */
michael@0 729 enum x86_asm_format {
michael@0 730 unknown_syntax = 0, /* never use! */
michael@0 731 native_syntax, /* header: 35 bytes */
michael@0 732 intel_syntax, /* header: 23 bytes */
michael@0 733 att_syntax, /* header: 23 bytes */
michael@0 734 xml_syntax, /* header: 679 bytes */
michael@0 735 raw_syntax /* header: 172 bytes */
michael@0 736 };
michael@0 737
michael@0 738 /* format (sprintf) an operand into 'buf' using specified syntax */
michael@0 739 int x86_format_operand(x86_op_t *op, char *buf, int len,
michael@0 740 enum x86_asm_format format);
michael@0 741
michael@0 742 /* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
michael@0 743 int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
michael@0 744 enum x86_asm_format format);
michael@0 745
michael@0 746 /* format (sprintf) an instruction into 'buf' using specified syntax;
michael@0 747 * this includes formatting all operands */
michael@0 748 int x86_format_insn(x86_insn_t *insn, char *buf, int len, enum x86_asm_format);
michael@0 749
michael@0 750 /* fill 'buf' with a description of the format's syntax */
michael@0 751 int x86_format_header( char *buf, int len, enum x86_asm_format format);
michael@0 752
michael@0 753 /* Endianness of an x86 CPU : 0 is big, 1 is little; always returns 1 */
michael@0 754 unsigned int x86_endian(void);
michael@0 755
michael@0 756 /* Default address and operand size in bytes */
michael@0 757 unsigned int x86_addr_size(void);
michael@0 758 unsigned int x86_op_size(void);
michael@0 759
michael@0 760 /* Size of a machine word in bytes */
michael@0 761 unsigned int x86_word_size(void);
michael@0 762
michael@0 763 /* maximum size of a code instruction */
michael@0 764 #define x86_max_inst_size(x) x86_max_insn_size(x)
michael@0 765 unsigned int x86_max_insn_size(void);
michael@0 766
michael@0 767 /* register IDs of Stack, Frame, Instruction pointer and Flags register */
michael@0 768 unsigned int x86_sp_reg(void);
michael@0 769 unsigned int x86_fp_reg(void);
michael@0 770 unsigned int x86_ip_reg(void);
michael@0 771 unsigned int x86_flag_reg(void);
michael@0 772
michael@0 773 /* fill 'reg' struct with details of register 'id' */
michael@0 774 void x86_reg_from_id( unsigned int id, x86_reg_t * reg );
michael@0 775
michael@0 776 /* convenience macro demonstrating how to get an aliased register; proto is
michael@0 777 * void x86_get_aliased_reg( x86_reg_t *alias_reg, x86_reg_t *output_reg )
michael@0 778 * where 'alias_reg' is a reg operand and 'output_reg' is filled with the
michael@0 779 * register that the operand is an alias for */
michael@0 780 #define x86_get_aliased_reg( alias_reg, output_reg ) \
michael@0 781 x86_reg_from_id( alias_reg->alias, output_reg )
michael@0 782
michael@0 783
michael@0 784 /* ================================== Invariant Instruction Representation */
michael@0 785 /* Invariant instructions are used for generating binary signatures;
michael@0 786 * the instruction is modified so that all variant bytes in an instruction
michael@0 787 * are replaced with a wildcard byte.
michael@0 788 *
michael@0 789 * A 'variant byte' is one that is expected to be modified by either the
michael@0 790 * static or the dynamic linker: for example, an address encoded in an
michael@0 791 * instruction.
michael@0 792 *
michael@0 793 * By comparing the invariant representation of one instruction [or of a
michael@0 794 * sequence of instructions] with the invariant representation of another,
michael@0 795 * one determine whether the two invariant representations are from the same
michael@0 796 * relocatable object [.o] file. Thus one can use binary signatures [which
michael@0 797 * are just sequences of invariant instruction representations] to look for
michael@0 798 * library routines which have been statically-linked into a binary.
michael@0 799 *
michael@0 800 * The invariant routines are faster and smaller than the disassembly
michael@0 801 * routines; they can be used to determine the size of an instruction
michael@0 802 * without all of the overhead of a full instruction disassembly.
michael@0 803 */
michael@0 804
michael@0 805 /* This byte is used to replace variant bytes */
michael@0 806 #define X86_WILDCARD_BYTE 0xF4
michael@0 807
michael@0 808 typedef struct {
michael@0 809 enum x86_op_type type; /* operand type */
michael@0 810 enum x86_op_datatype datatype; /* operand size */
michael@0 811 enum x86_op_access access; /* operand access [RWX] */
michael@0 812 enum x86_op_flags flags; /* misc flags */
michael@0 813 } x86_invariant_op_t;
michael@0 814
michael@0 815 typedef struct {
michael@0 816 unsigned char bytes[64]; /* invariant representation */
michael@0 817 unsigned int size; /* number of bytes in insn */
michael@0 818 enum x86_insn_group group; /* meta-type, e.g. INS_EXEC */
michael@0 819 enum x86_insn_type type; /* type, e.g. INS_BRANCH */
michael@0 820 x86_invariant_op_t operands[3]; /* operands: dest, src, imm */
michael@0 821 } x86_invariant_t;
michael@0 822
michael@0 823
michael@0 824 /* return a version of the instruction with the variant bytes masked out */
michael@0 825 size_t x86_invariant_disasm( unsigned char *buf, int buf_len,
michael@0 826 x86_invariant_t *inv );
michael@0 827 /* return the size in bytes of the intruction pointed to by 'buf';
michael@0 828 * this used x86_invariant_disasm since it faster than x86_disasm */
michael@0 829 size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len );
michael@0 830
michael@0 831 #ifdef __cplusplus
michael@0 832 }
michael@0 833 #endif
michael@0 834
michael@0 835
michael@0 836 #endif

mercurial