toolkit/crashreporter/google-breakpad/src/third_party/libdisasm/libdis.h

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/toolkit/crashreporter/google-breakpad/src/third_party/libdisasm/libdis.h	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,836 @@
     1.4 +#ifndef LIBDISASM_H
     1.5 +#define LIBDISASM_H
     1.6 +
     1.7 +#ifdef WIN32
     1.8 +#include <windows.h>
     1.9 +#endif
    1.10 +
    1.11 +#include <stdint.h>
    1.12 +
    1.13 +/* 'NEW" types
    1.14 + * __________________________________________________________________________*/
    1.15 +#ifndef LIBDISASM_QWORD_H       /* do not interfere with qword.h */
    1.16 +        #define LIBDISASM_QWORD_H
    1.17 +        #ifdef _MSC_VER
    1.18 +                typedef __int64         qword_t;
    1.19 +        #else
    1.20 +                typedef int64_t         qword_t;
    1.21 +        #endif
    1.22 +#endif
    1.23 +
    1.24 +#include <sys/types.h>
    1.25 +
    1.26 +#ifdef __cplusplus
    1.27 +extern "C" {
    1.28 +#endif
    1.29 +
    1.30 +/* 'NEW" x86 API
    1.31 + * __________________________________________________________________________*/
    1.32 +
    1.33 +
    1.34 +/* ========================================= Error Reporting */
    1.35 +/* REPORT CODES
    1.36 + *      These are passed to a reporter function passed at initialization.
    1.37 + *      Each code determines the type of the argument passed to the reporter;
    1.38 + *      this allows the report to recover from errors, or just log them.
    1.39 + */
    1.40 +enum x86_report_codes {
    1.41 +        report_disasm_bounds,   /* RVA OUT OF BOUNDS : The disassembler could
    1.42 +                                   not disassemble the supplied RVA as it is
    1.43 +                                   out of the range of the buffer. The
    1.44 +                                   application should store the address and
    1.45 +                                   attempt to determine what section of the
    1.46 +                                   binary it is in, then disassemble the
    1.47 +                                   address from the bytes in that section.
    1.48 +                                        data: uint32_t rva */
    1.49 +        report_insn_bounds,     /* INSTRUCTION OUT OF BOUNDS: The disassembler
    1.50 +                                   could not disassemble the instruction as
    1.51 +                                   the instruction would require bytes beyond
    1.52 +                                   the end of the current buffer. This usually
    1.53 +                                   indicated garbage bytes at the end of a
    1.54 +                                   buffer, or an incorrectly-sized buffer.
    1.55 +                                        data: uint32_t rva */
    1.56 +        report_invalid_insn,    /* INVALID INSTRUCTION: The disassembler could
    1.57 +                                   not disassemble the instruction as it has an
    1.58 +                                   invalid combination of opcodes and operands.
    1.59 +                                   This will stop automated disassembly; the
    1.60 +                                   application can restart the disassembly
    1.61 +                                   after the invalid instruction.
    1.62 +                                        data: uint32_t rva */
    1.63 +        report_unknown
    1.64 +};
    1.65 +
    1.66 +/* 'arg' is optional arbitrary data provided by the code passing the 
    1.67 + *       callback -- for example, it could be 'this' or 'self' in OOP code.
    1.68 + * 'code' is provided by libdisasm, it is one of the above
    1.69 + * 'data' is provided by libdisasm and is context-specific, per the enums */
    1.70 +typedef void (*DISASM_REPORTER)( enum x86_report_codes code, 
    1.71 +				 void *data, void *arg );
    1.72 +
    1.73 +
    1.74 +/* x86_report_error : Call the register reporter to report an error */
    1.75 +void x86_report_error( enum x86_report_codes code, void *data );
    1.76 +
    1.77 +/* ========================================= Libdisasm Management Routines */
    1.78 +enum x86_options {		/* these can be ORed together */
    1.79 +        opt_none= 0,
    1.80 +        opt_ignore_nulls=1,     /* ignore sequences of > 4 NULL bytes */
    1.81 +        opt_16_bit=2,           /* 16-bit/DOS disassembly */
    1.82 +        opt_att_mnemonics=4,    /* use AT&T syntax names for alternate opcode mnemonics */
    1.83 +};
    1.84 +
    1.85 +/* management routines */
    1.86 +/* 'arg' is caller-specific data which is passed as the first argument
    1.87 + * to the reporter callback routine */
    1.88 +int x86_init( enum x86_options options, DISASM_REPORTER reporter, void *arg);
    1.89 +void x86_set_reporter( DISASM_REPORTER reporter, void *arg);
    1.90 +void x86_set_options( enum x86_options options );
    1.91 +enum x86_options x86_get_options( void );
    1.92 +int x86_cleanup(void);
    1.93 +
    1.94 +
    1.95 +/* ========================================= Instruction Representation */
    1.96 +/* these defines are only intended for use in the array decl's */
    1.97 +#define MAX_REGNAME 8
    1.98 +
    1.99 +#define MAX_PREFIX_STR 32
   1.100 +#define MAX_MNEM_STR 16
   1.101 +#define MAX_INSN_SIZE 20        /* same as in i386.h */
   1.102 +#define MAX_OP_STRING 32        /* max possible operand size in string form */
   1.103 +#define MAX_OP_RAW_STRING 64    /* max possible operand size in raw form */
   1.104 +#define MAX_OP_XML_STRING 256   /* max possible operand size in xml form */
   1.105 +#define MAX_NUM_OPERANDS 8	/* max # implicit and explicit operands */
   1.106 +/* in these, the '2 *' is arbitrary: the max # of operands should require
   1.107 + * more space than the rest of the insn */
   1.108 +#define MAX_INSN_STRING 512        /* 2 * 8 * MAX_OP_STRING */
   1.109 +#define MAX_INSN_RAW_STRING 1024   /* 2 * 8 * MAX_OP_RAW_STRING */
   1.110 +#define MAX_INSN_XML_STRING 4096   /* 2 * 8 * MAX_OP_XML_STRING */
   1.111 +
   1.112 +enum x86_reg_type {     /* NOTE: these may be ORed together */
   1.113 +        reg_gen         = 0x00001,      /* general purpose */
   1.114 +        reg_in          = 0x00002,      /* incoming args, ala RISC */
   1.115 +        reg_out         = 0x00004,      /* args to calls, ala RISC */
   1.116 +        reg_local       = 0x00008,      /* local vars, ala RISC */
   1.117 +        reg_fpu         = 0x00010,      /* FPU data register */
   1.118 +        reg_seg         = 0x00020,      /* segment register */
   1.119 +        reg_simd        = 0x00040,      /* SIMD/MMX reg */
   1.120 +        reg_sys         = 0x00080,      /* restricted/system register */
   1.121 +        reg_sp          = 0x00100,      /* stack pointer */
   1.122 +        reg_fp          = 0x00200,      /* frame pointer */
   1.123 +        reg_pc          = 0x00400,      /* program counter */
   1.124 +        reg_retaddr     = 0x00800,      /* return addr for func */
   1.125 +        reg_cond        = 0x01000,      /* condition code / flags */
   1.126 +        reg_zero        = 0x02000,      /* zero register, ala RISC */
   1.127 +        reg_ret         = 0x04000,      /* return value */
   1.128 +        reg_src         = 0x10000,      /* array/rep source */
   1.129 +        reg_dest        = 0x20000,      /* array/rep destination */
   1.130 +        reg_count       = 0x40000       /* array/rep/loop counter */
   1.131 +};
   1.132 +
   1.133 +/* x86_reg_t : an X86 CPU register */
   1.134 +typedef struct {
   1.135 +        char name[MAX_REGNAME];
   1.136 +        enum x86_reg_type type;         /* what register is used for */
   1.137 +        unsigned int size;              /* size of register in bytes */
   1.138 +        unsigned int id;                /* register ID #, for quick compares */
   1.139 +	unsigned int alias;		/* ID of reg this is an alias for */
   1.140 +	unsigned int shift;		/* amount to shift aliased reg by */
   1.141 +} x86_reg_t;
   1.142 +
   1.143 +/* x86_ea_t : an X86 effective address (address expression) */
   1.144 +typedef struct {
   1.145 +        unsigned int     scale;         /* scale factor */
   1.146 +        x86_reg_t        index, base;   /* index, base registers */
   1.147 +        int32_t          disp;          /* displacement */
   1.148 +        char             disp_sign;     /* is negative? 1/0 */
   1.149 +        char             disp_size;     /* 0, 1, 2, 4 */
   1.150 +} x86_ea_t;
   1.151 +
   1.152 +/* x86_absolute_t : an X86 segment:offset address (descriptor) */
   1.153 +typedef struct {
   1.154 +	unsigned short	segment;	/* loaded directly into CS */
   1.155 +	union {
   1.156 +		unsigned short	off16;	/* loaded directly into IP */
   1.157 +		uint32_t		off32;	/* loaded directly into EIP */
   1.158 +	} offset;	
   1.159 +} x86_absolute_t;
   1.160 +
   1.161 +enum x86_op_type {      /* mutually exclusive */
   1.162 +        op_unused = 0,          /* empty/unused operand: should never occur */
   1.163 +        op_register = 1,        /* CPU register */
   1.164 +        op_immediate = 2,       /* Immediate Value */
   1.165 +        op_relative_near = 3,   /* Relative offset from IP */
   1.166 +        op_relative_far = 4,    /* Relative offset from IP */
   1.167 +        op_absolute = 5,        /* Absolute address (ptr16:32) */
   1.168 +        op_expression = 6,      /* Address expression (scale/index/base/disp) */
   1.169 +        op_offset = 7,          /* Offset from start of segment (m32) */
   1.170 +        op_unknown
   1.171 +};
   1.172 +
   1.173 +#define x86_optype_is_address( optype ) \
   1.174 +	( optype == op_absolute || optype == op_offset )
   1.175 +#define x86_optype_is_relative( optype ) \
   1.176 +	( optype == op_relative_near || optype == op_relative_far )
   1.177 +#define x86_optype_is_memory( optype ) \
   1.178 +	( optype > op_immediate && optype < op_unknown )
   1.179 +
   1.180 +enum x86_op_datatype {          /* these use Intel's lame terminology */
   1.181 +        op_byte = 1,            /* 1 byte integer */
   1.182 +        op_word = 2,            /* 2 byte integer */
   1.183 +        op_dword = 3,           /* 4 byte integer */
   1.184 +        op_qword = 4,           /* 8 byte integer */
   1.185 +        op_dqword = 5,          /* 16 byte integer */
   1.186 +        op_sreal = 6,           /* 4 byte real (single real) */
   1.187 +        op_dreal = 7,           /* 8 byte real (double real) */
   1.188 +        op_extreal = 8,         /* 10 byte real (extended real) */
   1.189 +        op_bcd = 9,             /* 10 byte binary-coded decimal */
   1.190 +        op_ssimd = 10,          /* 16 byte : 4 packed single FP (SIMD, MMX) */
   1.191 +        op_dsimd = 11,          /* 16 byte : 2 packed double FP (SIMD, MMX) */
   1.192 +        op_sssimd = 12,         /* 4 byte : scalar single FP (SIMD, MMX) */
   1.193 +        op_sdsimd = 13,         /* 8 byte : scalar double FP (SIMD, MMX) */
   1.194 +	op_descr32 = 14,	/* 6 byte Intel descriptor 2:4 */
   1.195 +	op_descr16 = 15,	/* 4 byte Intel descriptor 2:2 */
   1.196 +	op_pdescr32 = 16,	/* 6 byte Intel pseudo-descriptor 32:16 */
   1.197 +	op_pdescr16 = 17,	/* 6 byte Intel pseudo-descriptor 8:24:16 */
   1.198 +	op_bounds16 = 18,	/* signed 16:16 lower:upper bounds */
   1.199 +	op_bounds32 = 19,	/* signed 32:32 lower:upper bounds */
   1.200 +        op_fpuenv16 = 20,	/* 14 byte FPU control/environment data */
   1.201 +        op_fpuenv32 = 21,	/* 28 byte FPU control/environment data */
   1.202 +	op_fpustate16 = 22,	/* 94 byte FPU state (env & reg stack) */
   1.203 +	op_fpustate32 = 23,	/* 108 byte FPU state (env & reg stack) */
   1.204 +	op_fpregset = 24,	/* 512 bytes: register set */
   1.205 +	op_fpreg = 25,		/* FPU register */
   1.206 +    op_none = 0xFF,     /* operand without a datatype (INVLPG) */
   1.207 +};
   1.208 +
   1.209 +enum x86_op_access {    /* ORed together */
   1.210 +        op_read = 1,
   1.211 +        op_write = 2,
   1.212 +        op_execute = 4
   1.213 +};
   1.214 +
   1.215 +enum x86_op_flags {     /* ORed together, but segs are mutually exclusive */
   1.216 +        op_signed = 1,          /* signed integer */
   1.217 +        op_string = 2,          /* possible string or array */
   1.218 +        op_constant = 4,        /* symbolic constant */
   1.219 +        op_pointer = 8,         /* operand points to a memory address */
   1.220 +	op_sysref = 0x010,	/* operand is a syscall number */
   1.221 +	op_implied = 0x020,	/* operand is implicit in the insn */
   1.222 +	op_hardcode = 0x40,	/* operand is hardcoded in insn definition */
   1.223 +	/* NOTE: an 'implied' operand is one which can be considered a side
   1.224 +	 * effect of the insn, e.g. %esp being modified by PUSH or POP. A
   1.225 +	 * 'hard-coded' operand is one which is specified in the instruction
   1.226 +	 * definition, e.g. %es:%edi in MOVSB or 1 in ROL Eb, 1. The difference
   1.227 +	 * is that hard-coded operands are printed by disassemblers and are
   1.228 +	 * required to re-assemble, while implicit operands are invisible. */
   1.229 +        op_es_seg = 0x100,      /* ES segment override */
   1.230 +        op_cs_seg = 0x200,      /* CS segment override */
   1.231 +        op_ss_seg = 0x300,      /* SS segment override */
   1.232 +        op_ds_seg = 0x400,      /* DS segment override */
   1.233 +        op_fs_seg = 0x500,      /* FS segment override */
   1.234 +        op_gs_seg = 0x600       /* GS segment override */
   1.235 +};
   1.236 +
   1.237 +/* x86_op_t : an X86 instruction operand */
   1.238 +typedef struct {
   1.239 +        enum x86_op_type        type;           /* operand type */
   1.240 +        enum x86_op_datatype    datatype;       /* operand size */
   1.241 +        enum x86_op_access      access;         /* operand access [RWX] */
   1.242 +        enum x86_op_flags       flags;          /* misc flags */
   1.243 +        union {
   1.244 +		/* sizeof will have to work on these union members! */
   1.245 +                /* immediate values */
   1.246 +                char            sbyte;
   1.247 +                short           sword;
   1.248 +                int32_t         sdword;
   1.249 +                qword_t         sqword;
   1.250 +                unsigned char   byte;
   1.251 +                unsigned short  word;
   1.252 +                uint32_t        dword;
   1.253 +                qword_t         qword;
   1.254 +                float           sreal;
   1.255 +                double          dreal;
   1.256 +                /* misc large/non-native types */
   1.257 +                unsigned char   extreal[10];
   1.258 +                unsigned char   bcd[10];
   1.259 +                qword_t         dqword[2];
   1.260 +                unsigned char   simd[16];
   1.261 +                unsigned char   fpuenv[28];
   1.262 +                /* offset from segment */
   1.263 +                uint32_t        offset;
   1.264 +                /* ID of CPU register */
   1.265 +                x86_reg_t       reg;
   1.266 +                /* offsets from current insn */
   1.267 +                char            relative_near;
   1.268 +                int32_t         relative_far;
   1.269 +		/* segment:offset */
   1.270 +		x86_absolute_t	absolute;
   1.271 +                /* effective address [expression] */
   1.272 +                x86_ea_t        expression;
   1.273 +        } data;
   1.274 +	/* this is needed to make formatting operands more sane */
   1.275 +	void * insn;		/* pointer to x86_insn_t owning operand */
   1.276 +} x86_op_t;
   1.277 +
   1.278 +/* Linked list of x86_op_t; provided for manual traversal of the operand
   1.279 + * list in an insn. Users wishing to add operands to this list, e.g. to add
   1.280 + * implicit operands, should use x86_operand_new in x86_operand_list.h */
   1.281 +typedef struct x86_operand_list {
   1.282 +	x86_op_t op;
   1.283 +	struct x86_operand_list *next;
   1.284 +} x86_oplist_t;
   1.285 +
   1.286 +enum x86_insn_group {
   1.287 +	insn_none = 0,		/* invalid instruction */
   1.288 +        insn_controlflow = 1,
   1.289 +        insn_arithmetic = 2,
   1.290 +        insn_logic = 3,
   1.291 +        insn_stack = 4,
   1.292 +        insn_comparison = 5,
   1.293 +        insn_move = 6,
   1.294 +        insn_string = 7,
   1.295 +        insn_bit_manip = 8,
   1.296 +        insn_flag_manip = 9,
   1.297 +        insn_fpu = 10,
   1.298 +        insn_interrupt = 13,
   1.299 +        insn_system = 14,
   1.300 +        insn_other = 15
   1.301 +};
   1.302 +
   1.303 +enum x86_insn_type {
   1.304 +	insn_invalid = 0,	/* invalid instruction */
   1.305 +        /* insn_controlflow */
   1.306 +        insn_jmp = 0x1001,
   1.307 +        insn_jcc = 0x1002,
   1.308 +        insn_call = 0x1003,
   1.309 +        insn_callcc = 0x1004,
   1.310 +        insn_return = 0x1005,
   1.311 +        /* insn_arithmetic */
   1.312 +        insn_add = 0x2001,
   1.313 +        insn_sub = 0x2002,
   1.314 +        insn_mul = 0x2003,
   1.315 +        insn_div = 0x2004,
   1.316 +        insn_inc = 0x2005,
   1.317 +        insn_dec = 0x2006,
   1.318 +        insn_shl = 0x2007,
   1.319 +        insn_shr = 0x2008,
   1.320 +        insn_rol = 0x2009,
   1.321 +        insn_ror = 0x200A,
   1.322 +        /* insn_logic */
   1.323 +        insn_and = 0x3001,
   1.324 +        insn_or = 0x3002,
   1.325 +        insn_xor = 0x3003,
   1.326 +        insn_not = 0x3004,
   1.327 +        insn_neg = 0x3005,
   1.328 +        /* insn_stack */
   1.329 +        insn_push = 0x4001,
   1.330 +        insn_pop = 0x4002,
   1.331 +        insn_pushregs = 0x4003,
   1.332 +        insn_popregs = 0x4004,
   1.333 +        insn_pushflags = 0x4005,
   1.334 +        insn_popflags = 0x4006,
   1.335 +        insn_enter = 0x4007,
   1.336 +        insn_leave = 0x4008,
   1.337 +        /* insn_comparison */
   1.338 +        insn_test = 0x5001,
   1.339 +        insn_cmp = 0x5002,
   1.340 +        /* insn_move */
   1.341 +        insn_mov = 0x6001,      /* move */
   1.342 +        insn_movcc = 0x6002,    /* conditional move */
   1.343 +        insn_xchg = 0x6003,     /* exchange */
   1.344 +        insn_xchgcc = 0x6004,   /* conditional exchange */
   1.345 +        /* insn_string */
   1.346 +        insn_strcmp = 0x7001,
   1.347 +        insn_strload = 0x7002,
   1.348 +        insn_strmov = 0x7003,
   1.349 +        insn_strstore = 0x7004,
   1.350 +        insn_translate = 0x7005,        /* xlat */
   1.351 +        /* insn_bit_manip */
   1.352 +        insn_bittest = 0x8001,
   1.353 +        insn_bitset = 0x8002,
   1.354 +        insn_bitclear = 0x8003,
   1.355 +        /* insn_flag_manip */
   1.356 +        insn_clear_carry = 0x9001,
   1.357 +        insn_clear_zero = 0x9002,
   1.358 +        insn_clear_oflow = 0x9003,
   1.359 +        insn_clear_dir = 0x9004,
   1.360 +        insn_clear_sign = 0x9005,
   1.361 +        insn_clear_parity = 0x9006,
   1.362 +        insn_set_carry = 0x9007,
   1.363 +        insn_set_zero = 0x9008,
   1.364 +        insn_set_oflow = 0x9009,
   1.365 +        insn_set_dir = 0x900A,
   1.366 +        insn_set_sign = 0x900B,
   1.367 +        insn_set_parity = 0x900C,
   1.368 +        insn_tog_carry = 0x9010,
   1.369 +        insn_tog_zero = 0x9020,
   1.370 +        insn_tog_oflow = 0x9030,
   1.371 +        insn_tog_dir = 0x9040,
   1.372 +        insn_tog_sign = 0x9050,
   1.373 +        insn_tog_parity = 0x9060,
   1.374 +        /* insn_fpu */
   1.375 +        insn_fmov = 0xA001,
   1.376 +        insn_fmovcc = 0xA002,
   1.377 +        insn_fneg = 0xA003,
   1.378 +        insn_fabs = 0xA004,
   1.379 +        insn_fadd = 0xA005,
   1.380 +        insn_fsub = 0xA006,
   1.381 +        insn_fmul = 0xA007,
   1.382 +        insn_fdiv = 0xA008,
   1.383 +        insn_fsqrt = 0xA009,
   1.384 +        insn_fcmp = 0xA00A,
   1.385 +        insn_fcos = 0xA00C,
   1.386 +        insn_fldpi = 0xA00D,
   1.387 +        insn_fldz = 0xA00E,
   1.388 +        insn_ftan = 0xA00F,
   1.389 +        insn_fsine = 0xA010,
   1.390 +        insn_fsys = 0xA020,
   1.391 +        /* insn_interrupt */
   1.392 +        insn_int = 0xD001,
   1.393 +        insn_intcc = 0xD002,    /* not present in x86 ISA */
   1.394 +        insn_iret = 0xD003,
   1.395 +        insn_bound = 0xD004,
   1.396 +        insn_debug = 0xD005,
   1.397 +        insn_trace = 0xD006,
   1.398 +        insn_invalid_op = 0xD007,
   1.399 +        insn_oflow = 0xD008,
   1.400 +        /* insn_system */
   1.401 +        insn_halt = 0xE001,
   1.402 +        insn_in = 0xE002,       /* input from port/bus */
   1.403 +        insn_out = 0xE003,      /* output to port/bus */
   1.404 +        insn_cpuid = 0xE004,
   1.405 +        /* insn_other */
   1.406 +        insn_nop = 0xF001,
   1.407 +        insn_bcdconv = 0xF002,  /* convert to or from BCD */
   1.408 +        insn_szconv = 0xF003    /* change size of operand */
   1.409 +};
   1.410 +
   1.411 +/* These flags specify special characteristics of the instruction, such as
   1.412 + * whether the inatruction is privileged or whether it serializes the
   1.413 + * pipeline.
   1.414 + * NOTE : These may not be accurate for all instructions; updates to the
   1.415 + * opcode tables have not been completed. */
   1.416 +enum x86_insn_note {
   1.417 +	insn_note_ring0		= 1,	/* Only available in ring 0 */
   1.418 +	insn_note_smm		= 2,	/* "" in System Management Mode */
   1.419 +	insn_note_serial	= 4,	/* Serializing instruction */
   1.420 +	insn_note_nonswap	= 8,	/* Does not swap arguments in att-style formatting */
   1.421 +	insn_note_nosuffix  = 16,	/* Does not have size suffix in att-style formatting */
   1.422 +};
   1.423 +
   1.424 +/* This specifies what effects the instruction has on the %eflags register */
   1.425 +enum x86_flag_status {
   1.426 +        insn_carry_set = 0x1,			/* CF */
   1.427 +        insn_zero_set = 0x2,			/* ZF */
   1.428 +        insn_oflow_set = 0x4,			/* OF */
   1.429 +        insn_dir_set = 0x8,			/* DF */
   1.430 +        insn_sign_set = 0x10,			/* SF */
   1.431 +        insn_parity_set = 0x20,			/* PF */
   1.432 +        insn_carry_or_zero_set = 0x40,
   1.433 +        insn_zero_set_or_sign_ne_oflow = 0x80,
   1.434 +        insn_carry_clear = 0x100,
   1.435 +        insn_zero_clear = 0x200,
   1.436 +        insn_oflow_clear = 0x400,
   1.437 +        insn_dir_clear = 0x800,
   1.438 +        insn_sign_clear = 0x1000,
   1.439 +        insn_parity_clear = 0x2000,
   1.440 +        insn_sign_eq_oflow = 0x4000,
   1.441 +        insn_sign_ne_oflow = 0x8000
   1.442 +};
   1.443 +
   1.444 +/* The CPU model in which the insturction first appeared; this can be used
   1.445 + * to mask out instructions appearing in earlier or later models or to
   1.446 + * check the portability of a binary.
   1.447 + * NOTE : These may not be accurate for all instructions; updates to the
   1.448 + * opcode tables have not been completed. */
   1.449 +enum x86_insn_cpu {
   1.450 +	cpu_8086 	= 1,	/* Intel */
   1.451 +	cpu_80286	= 2,
   1.452 +	cpu_80386	= 3,
   1.453 +	cpu_80387	= 4,
   1.454 +	cpu_80486	= 5,
   1.455 +	cpu_pentium	= 6,
   1.456 +	cpu_pentiumpro	= 7,
   1.457 +	cpu_pentium2	= 8,
   1.458 +	cpu_pentium3	= 9,
   1.459 +	cpu_pentium4	= 10,
   1.460 +	cpu_k6		= 16,	/* AMD */
   1.461 +	cpu_k7		= 32,
   1.462 +	cpu_athlon	= 48
   1.463 +};
   1.464 +
   1.465 +/* CPU ISA subsets: These are derived from the Instruction Groups in
   1.466 + * Intel Vol 1 Chapter 5; they represent subsets of the IA32 ISA but
   1.467 + * do not reflect the 'type' of the instruction in the same way that
   1.468 + * x86_insn_group does. In short, these are AMD/Intel's somewhat useless 
   1.469 + * designations.
   1.470 + * NOTE : These may not be accurate for all instructions; updates to the
   1.471 + * opcode tables have not been completed. */
   1.472 +enum x86_insn_isa {
   1.473 +	isa_gp		= 1,	/* general purpose */
   1.474 +	isa_fp		= 2,	/* floating point */
   1.475 +	isa_fpumgt	= 3,	/* FPU/SIMD management */
   1.476 +	isa_mmx		= 4,	/* Intel MMX */
   1.477 +	isa_sse1	= 5,	/* Intel SSE SIMD */
   1.478 +	isa_sse2	= 6,	/* Intel SSE2 SIMD */
   1.479 +	isa_sse3	= 7,	/* Intel SSE3 SIMD */
   1.480 +	isa_3dnow	= 8,	/* AMD 3DNow! SIMD */
   1.481 +	isa_sys		= 9	/* system instructions */
   1.482 +};
   1.483 +
   1.484 +enum x86_insn_prefix {
   1.485 +        insn_no_prefix = 0,
   1.486 +        insn_rep_zero = 1,	/* REPZ and REPE */
   1.487 +        insn_rep_notzero = 2,	/* REPNZ and REPNZ */
   1.488 +        insn_lock = 4		/* LOCK: */
   1.489 +};
   1.490 +
   1.491 +/* TODO: maybe provide insn_new/free(), and have disasm return new insn_t */
   1.492 +/* x86_insn_t : an X86 instruction */
   1.493 +typedef struct {
   1.494 +        /* information about the instruction */
   1.495 +        uint32_t addr;             /* load address */
   1.496 +        uint32_t offset;           /* offset into file/buffer */
   1.497 +        enum x86_insn_group group;      /* meta-type, e.g. INS_EXEC */
   1.498 +        enum x86_insn_type type;        /* type, e.g. INS_BRANCH */
   1.499 +	enum x86_insn_note note;	/* note, e.g. RING0 */
   1.500 +        unsigned char bytes[MAX_INSN_SIZE];
   1.501 +        unsigned char size;             /* size of insn in bytes */
   1.502 +	/* 16/32-bit mode settings */
   1.503 +	unsigned char addr_size;	/* default address size : 2 or 4 */
   1.504 +	unsigned char op_size;		/* default operand size : 2 or 4 */
   1.505 +	/* CPU/instruction set */
   1.506 +	enum x86_insn_cpu cpu;
   1.507 +	enum x86_insn_isa isa;
   1.508 +	/* flags */
   1.509 +        enum x86_flag_status flags_set; /* flags set or tested by insn */
   1.510 +        enum x86_flag_status flags_tested;
   1.511 +	/* stack */
   1.512 +	unsigned char stack_mod;	/* 0 or 1 : is the stack modified? */
   1.513 +	int32_t stack_mod_val;		/* val stack is modified by if known */
   1.514 +
   1.515 +        /* the instruction proper */
   1.516 +        enum x86_insn_prefix prefix;	/* prefixes ORed together */
   1.517 +        char prefix_string[MAX_PREFIX_STR]; /* prefixes [might be truncated] */
   1.518 +        char mnemonic[MAX_MNEM_STR];
   1.519 +        x86_oplist_t *operands;		/* list of explicit/implicit operands */
   1.520 +	size_t operand_count;		/* total number of operands */
   1.521 +	size_t explicit_count;		/* number of explicit operands */
   1.522 +        /* convenience fields for user */
   1.523 +        void *block;                    /* code block containing this insn */
   1.524 +        void *function;                 /* function containing this insn */
   1.525 +        int tag;			/* tag the insn as seen/processed */
   1.526 +} x86_insn_t;
   1.527 +
   1.528 +
   1.529 +/* returns 0 if an instruction is invalid, 1 if valid */
   1.530 +int x86_insn_is_valid( x86_insn_t *insn );
   1.531 +
   1.532 +/* DISASSEMBLY ROUTINES
   1.533 + *      Canonical order of arguments is
   1.534 + *        (buf, buf_len, buf_rva, offset, len, insn, func, arg, resolve_func)
   1.535 + *      ...but of course all of these are not used at the same time.
   1.536 + */
   1.537 +
   1.538 +
   1.539 +/* Function prototype for caller-supplied callback routine
   1.540 + *      These callbacks are intended to process 'insn' further, e.g. by
   1.541 + *      adding it to a linked list, database, etc */
   1.542 +typedef void (*DISASM_CALLBACK)( x86_insn_t *insn, void * arg );
   1.543 +
   1.544 +/* Function prototype for caller-supplied address resolver.
   1.545 + *      This routine is used to determine the rva to disassemble next, given
   1.546 + *      the 'dest' operand of a jump/call. This allows the caller to resolve
   1.547 + *      jump/call targets stored in a register or on the stack, and also allows
   1.548 + *      the caller to prevent endless loops by checking if an address has
   1.549 + *      already been disassembled. If an address cannot be resolved from the
   1.550 + *      operand, or if the address has already been disassembled, this routine
   1.551 + *      should return -1; in all other cases the RVA to be disassembled next
   1.552 + *      should be returned. */
   1.553 +typedef int32_t (*DISASM_RESOLVER)( x86_op_t *op, x86_insn_t * current_insn,
   1.554 +				 void *arg );
   1.555 +
   1.556 +
   1.557 +/* x86_disasm: Disassemble a single instruction from a buffer of bytes.
   1.558 + *             Returns size of instruction in bytes.
   1.559 + *             Caller is responsible for calling x86_oplist_free() on
   1.560 + *             a reused "insn" to avoid leaking memory when calling this
   1.561 + *             function repeatedly.
   1.562 + *      buf     : Buffer of bytes to disassemble
   1.563 + *      buf_len : Length of the buffer
   1.564 + *      buf_rva : Load address of the start of the buffer
   1.565 + *      offset  : Offset in buffer to disassemble
   1.566 + *      insn    : Structure to fill with disassembled instruction
   1.567 + */
   1.568 +unsigned int x86_disasm( unsigned char *buf, unsigned int buf_len,
   1.569 +                	 uint32_t buf_rva, unsigned int offset,
   1.570 +                	 x86_insn_t * insn );
   1.571 +
   1.572 +/* x86_disasm_range: Sequential disassembly of a range of bytes in a buffer,
   1.573 + *                   invoking a callback function each time an instruction
   1.574 + *                   is successfully disassembled. The 'range' refers to the
   1.575 + *                   bytes between 'offset' and 'offset + len' in the buffer;
   1.576 + *                   'len' is assumed to be less than the length of the buffer.
   1.577 + *                   Returns number of instructions processed.
   1.578 + *      buf     : Buffer of bytes to disassemble (e.g. .text section)
   1.579 + *      buf_rva : Load address of buffer (e.g. ELF Virtual Address)
   1.580 + *      offset  : Offset in buffer to start disassembly at
   1.581 + *      len     : Number of bytes to disassemble
   1.582 + *      func    : Callback function to invoke (may be NULL)
   1.583 + *      arg     : Arbitrary data to pass to callback (may be NULL)
   1.584 + */
   1.585 +unsigned int x86_disasm_range( unsigned char *buf, uint32_t buf_rva,
   1.586 +	                       unsigned int offset, unsigned int len,
   1.587 +	                       DISASM_CALLBACK func, void *arg );
   1.588 +
   1.589 +/* x86_disasm_forward: Flow-of-execution disassembly of the bytes in a buffer,
   1.590 + *                     invoking a callback function each time an instruction
   1.591 + *                     is successfully disassembled.
   1.592 + *      buf     : Buffer to disassemble (e.g. .text section)
   1.593 + *      buf_len : Number of bytes in buffer
   1.594 + *      buf_rva : Load address of buffer (e.g. ELF Virtual Address)
   1.595 + *      offset  : Offset in buffer to start disassembly at (e.g. entry point)
   1.596 + *      func    : Callback function to invoke (may be NULL)
   1.597 + *      arg     : Arbitrary data to pass to callback (may be NULL)
   1.598 + *      resolver: Caller-supplied address resolver. If no resolver is
   1.599 + *                supplied, a default internal one is used -- however the
   1.600 + *                internal resolver does NOT catch loops and could end up
   1.601 + *                disassembling forever..
   1.602 + *      r_arg	: Arbitrary data to pass to resolver (may be NULL)
   1.603 + */
   1.604 +unsigned int x86_disasm_forward( unsigned char *buf, unsigned int buf_len,
   1.605 +	                         uint32_t buf_rva, unsigned int offset,
   1.606 +	                         DISASM_CALLBACK func, void *arg,
   1.607 +	                         DISASM_RESOLVER resolver, void *r_arg );
   1.608 +
   1.609 +/* Instruction operands: these are stored as a list of explicit and
   1.610 + * implicit operands. It is recommended that the 'foreach' routines
   1.611 + * be used to when examining operands for purposes of data flow analysis */
   1.612 +
   1.613 +/* Operand FOREACH callback: 'arg' is an abritrary parameter passed to the
   1.614 + * foreach routine, 'insn' is the x86_insn_t whose operands are being
   1.615 + * iterated over, and 'op' is the current x86_op_t */
   1.616 +typedef void (*x86_operand_fn)(x86_op_t *op, x86_insn_t *insn, void *arg);
   1.617 +
   1.618 +/* FOREACH types: these are used to limit the foreach results to 
   1.619 + * operands which match a certain "type" (implicit or explicit)
   1.620 + * or which are accessed in certain ways (e.g. read or write). Note
   1.621 + * that this operates on the operand list of single instruction, so
   1.622 + * specifying the 'real' operand type (register, memory, etc) is not
   1.623 + * useful. Note also that by definition Execute Access implies Read
   1.624 + * Access and implies Not Write Access.
   1.625 + * The "type" (implicit or explicit) and the access method can
   1.626 + * be ORed together, e.g. op_wo | op_explicit */
   1.627 +enum x86_op_foreach_type {
   1.628 +	op_any 	= 0,		/* ALL operands (explicit, implicit, rwx) */
   1.629 +	op_dest = 1,		/* operands with Write access */
   1.630 +	op_src 	= 2,		/* operands with Read access */
   1.631 +	op_ro 	= 3,		/* operands with Read but not Write access */
   1.632 +	op_wo 	= 4,		/* operands with Write but not Read access */
   1.633 +	op_xo 	= 5,		/* operands with Execute access */
   1.634 +	op_rw 	= 6,		/* operands with Read AND Write access */
   1.635 +	op_implicit = 0x10,	/* operands that are implied by the opcode */
   1.636 +	op_explicit = 0x20	/* operands that are not side-effects */
   1.637 +};
   1.638 +
   1.639 +
   1.640 +/* free the operand list associated with an instruction -- useful for
   1.641 + * preventing memory leaks when free()ing an x86_insn_t */
   1.642 +void x86_oplist_free( x86_insn_t *insn );
   1.643 +
   1.644 +/* Operand foreach: invokes 'func' with 'insn' and 'arg' as arguments. The
   1.645 + * 'type' parameter is used to select only operands matching specific
   1.646 + * criteria. */
   1.647 +int x86_operand_foreach( x86_insn_t *insn, x86_operand_fn func, void *arg,
   1.648 +	       	  	 enum x86_op_foreach_type type);
   1.649 +
   1.650 +/* convenience routine: returns count of operands matching 'type' */
   1.651 +size_t x86_operand_count( x86_insn_t *insn, enum x86_op_foreach_type type );
   1.652 +
   1.653 +/* accessor functions for the operands */
   1.654 +x86_op_t * x86_operand_1st( x86_insn_t *insn );
   1.655 +x86_op_t * x86_operand_2nd( x86_insn_t *insn );
   1.656 +x86_op_t * x86_operand_3rd( x86_insn_t *insn );
   1.657 +
   1.658 +/* these allow libdisasm 2.0 accessor functions to still be used */
   1.659 +#define x86_get_dest_operand( insn ) x86_operand_1st( insn )
   1.660 +#define x86_get_src_operand( insn ) x86_operand_2nd( insn )
   1.661 +#define x86_get_imm_operand( insn ) x86_operand_3rd( insn )
   1.662 +
   1.663 +/* get size of operand data in bytes */
   1.664 +unsigned int x86_operand_size( x86_op_t *op );
   1.665 +
   1.666 +/* Operand Convenience Routines: the following three routines are common
   1.667 + * operations on operands, intended to ease the burden of the programmer. */
   1.668 +
   1.669 +/* Get Address: return the value of an offset operand, or the offset of
   1.670 + * a segment:offset absolute address */
   1.671 +uint32_t x86_get_address( x86_insn_t *insn );
   1.672 +
   1.673 +/* Get Relative Offset: return as a sign-extended int32_t the near or far
   1.674 + * relative offset operand, or 0 if there is none. There can be only one
   1.675 + * relaive offset operand in an instruction. */
   1.676 +int32_t x86_get_rel_offset( x86_insn_t *insn );
   1.677 +
   1.678 +/* Get Branch Target: return the x86_op_t containing the target of
   1.679 + * a jump or call operand, or NULL if there is no branch target. 
   1.680 + * Internally, a 'branch target' is defined as any operand with
   1.681 + * Execute Access set. There can be only one branch target per instruction. */
   1.682 +x86_op_t * x86_get_branch_target( x86_insn_t *insn );
   1.683 +
   1.684 +/* Get Immediate: return the x86_op_t containing the immediate operand
   1.685 + * for this instruction, or NULL if there is no immediate operand. There
   1.686 + * can be only one immediate operand per instruction */
   1.687 +x86_op_t * x86_get_imm( x86_insn_t *insn );
   1.688 +
   1.689 +/* Get Raw Immediate Data: returns a pointer to the immediate data encoded
   1.690 + * in the instruction. This is useful for large data types [>32 bits] currently
   1.691 + * not supported by libdisasm, or for determining if the disassembler
   1.692 + * screwed up the conversion of the immediate data. Note that 'imm' in this
   1.693 + * context refers to immediate data encoded at the end of an instruction as
   1.694 + * detailed in the Intel Manual Vol II Chapter 2; it does not refer to the
   1.695 + * 'op_imm' operand (the third operand in instructions like 'mul' */
   1.696 +unsigned char * x86_get_raw_imm( x86_insn_t *insn );
   1.697 +
   1.698 +
   1.699 +/* More accessor fuctions, this time for user-defined info... */
   1.700 +/* set the address (usually RVA) of the insn */
   1.701 +void x86_set_insn_addr( x86_insn_t *insn, uint32_t addr );
   1.702 +
   1.703 +/* set the offset (usually offset into file) of the insn */
   1.704 +void x86_set_insn_offset( x86_insn_t *insn, unsigned int offset );
   1.705 +
   1.706 +/* set a pointer to the function owning the instruction. The 
   1.707 + * type of 'func' is user-defined; libdisasm does not use the func field. */
   1.708 +void x86_set_insn_function( x86_insn_t *insn, void * func );
   1.709 +
   1.710 +/* set a pointer to the block of code owning the instruction. The 
   1.711 + * type of 'block' is user-defined; libdisasm does not use the block field. */
   1.712 +void x86_set_insn_block( x86_insn_t *insn, void * block );
   1.713 +
   1.714 +/* instruction tagging: these routines allow the programmer to mark
   1.715 + * instructions as "seen" in a DFS, for example. libdisasm does not use
   1.716 + * the tag field.*/ 
   1.717 +/* set insn->tag to 1 */
   1.718 +void x86_tag_insn( x86_insn_t *insn );
   1.719 +/* set insn->tag to 0 */
   1.720 +void x86_untag_insn( x86_insn_t *insn );
   1.721 +/* return insn->tag */
   1.722 +int x86_insn_is_tagged( x86_insn_t *insn );
   1.723 +
   1.724 +
   1.725 +/* Disassembly formats:
   1.726 + *      AT&T is standard AS/GAS-style: "mnemonic\tsrc, dest, imm"
   1.727 + *      Intel is standard MASM/NASM/TASM: "mnemonic\tdest,src, imm"
   1.728 + *      Native is tab-delimited: "RVA\tbytes\tmnemonic\tdest\tsrc\timm"
   1.729 + *      XML is your typical <insn> ... </insn>
   1.730 + *      Raw is addr|offset|size|bytes|prefix... see libdisasm_formats.7
   1.731 + */
   1.732 +enum x86_asm_format { 
   1.733 +	unknown_syntax = 0,		/* never use! */
   1.734 +	native_syntax, 			/* header: 35 bytes */
   1.735 +	intel_syntax, 			/* header: 23 bytes */
   1.736 +	att_syntax,  			/* header: 23 bytes */
   1.737 +	xml_syntax,			/* header: 679 bytes */
   1.738 +	raw_syntax			/* header: 172 bytes */
   1.739 +};
   1.740 +
   1.741 +/* format (sprintf) an operand into 'buf' using specified syntax */
   1.742 +int x86_format_operand(x86_op_t *op, char *buf, int len,
   1.743 +                  enum x86_asm_format format);
   1.744 +
   1.745 +/* format (sprintf) an instruction mnemonic into 'buf' using specified syntax */
   1.746 +int x86_format_mnemonic(x86_insn_t *insn, char *buf, int len,
   1.747 +                        enum x86_asm_format format);
   1.748 +
   1.749 +/* format (sprintf) an instruction into 'buf' using specified syntax;
   1.750 + * this includes formatting all operands */
   1.751 +int x86_format_insn(x86_insn_t *insn, char *buf, int len, enum x86_asm_format);
   1.752 +
   1.753 +/* fill 'buf' with a description of the format's syntax */
   1.754 +int x86_format_header( char *buf, int len, enum x86_asm_format format);
   1.755 +
   1.756 +/* Endianness of an x86 CPU : 0 is big, 1 is little; always returns 1 */
   1.757 +unsigned int x86_endian(void);
   1.758 +
   1.759 +/* Default address and operand size in bytes */
   1.760 +unsigned int x86_addr_size(void);
   1.761 +unsigned int x86_op_size(void);
   1.762 +
   1.763 +/* Size of a machine word in bytes */
   1.764 +unsigned int x86_word_size(void);
   1.765 +
   1.766 +/* maximum size of a code instruction */
   1.767 +#define x86_max_inst_size(x) x86_max_insn_size(x)
   1.768 +unsigned int x86_max_insn_size(void);
   1.769 +
   1.770 +/* register IDs of Stack, Frame, Instruction pointer and Flags register */
   1.771 +unsigned int x86_sp_reg(void);
   1.772 +unsigned int x86_fp_reg(void);
   1.773 +unsigned int x86_ip_reg(void);
   1.774 +unsigned int x86_flag_reg(void);
   1.775 +
   1.776 +/* fill 'reg' struct with details of register 'id' */
   1.777 +void x86_reg_from_id( unsigned int id, x86_reg_t * reg );
   1.778 +
   1.779 +/* convenience macro demonstrating how to get an aliased register; proto is
   1.780 + *   void x86_get_aliased_reg( x86_reg_t *alias_reg, x86_reg_t *output_reg )
   1.781 + * where 'alias_reg' is a reg operand and 'output_reg' is filled with the
   1.782 + * register that the operand is an alias for */
   1.783 +#define x86_get_aliased_reg( alias_reg, output_reg )			\
   1.784 +	x86_reg_from_id( alias_reg->alias, output_reg )
   1.785 +
   1.786 +
   1.787 +/* ================================== Invariant Instruction Representation */
   1.788 +/* Invariant instructions are used for generating binary signatures; 
   1.789 + * the instruction is modified so that all variant bytes in an instruction
   1.790 + * are replaced with a wildcard byte. 
   1.791 + *
   1.792 + * A 'variant byte' is one that is expected to be modified by either the 
   1.793 + * static or the dynamic linker: for example, an address encoded in an 
   1.794 + * instruction. 
   1.795 + *
   1.796 + * By comparing the invariant representation of one instruction [or of a
   1.797 + * sequence of instructions] with the invariant representation of another,
   1.798 + * one determine whether the two invariant representations are from the same
   1.799 + * relocatable object [.o] file. Thus one can use binary signatures [which
   1.800 + * are just sequences of invariant instruction representations] to look for
   1.801 + * library routines which have been statically-linked into a binary.
   1.802 + *
   1.803 + * The invariant routines are faster and smaller than the disassembly
   1.804 + * routines; they can be used to determine the size of an instruction 
   1.805 + * without all of the overhead of a full instruction disassembly.
   1.806 + */
   1.807 +
   1.808 +/* This byte is used to replace variant bytes */
   1.809 +#define X86_WILDCARD_BYTE 0xF4
   1.810 +
   1.811 +typedef struct {
   1.812 +        enum x86_op_type        type;           /* operand type */
   1.813 +        enum x86_op_datatype    datatype;       /* operand size */
   1.814 +        enum x86_op_access      access;         /* operand access [RWX] */
   1.815 +        enum x86_op_flags       flags;          /* misc flags */
   1.816 +} x86_invariant_op_t;
   1.817 +
   1.818 +typedef struct {
   1.819 +	unsigned char bytes[64];	/* invariant representation */
   1.820 +	unsigned int  size;		/* number of bytes in insn */
   1.821 +        enum x86_insn_group group;      /* meta-type, e.g. INS_EXEC */
   1.822 +        enum x86_insn_type type;        /* type, e.g. INS_BRANCH */
   1.823 +	x86_invariant_op_t operands[3];	/* operands: dest, src, imm */
   1.824 +} x86_invariant_t;
   1.825 + 
   1.826 +
   1.827 +/* return a version of the instruction with the variant bytes masked out */
   1.828 +size_t x86_invariant_disasm( unsigned char *buf, int buf_len, 
   1.829 +			  x86_invariant_t *inv );
   1.830 +/* return the size in bytes of the intruction pointed to by 'buf';
   1.831 + * this used x86_invariant_disasm since it faster than x86_disasm */
   1.832 +size_t x86_size_disasm( unsigned char *buf, unsigned int buf_len );
   1.833 +
   1.834 +#ifdef __cplusplus
   1.835 +}
   1.836 +#endif
   1.837 +
   1.838 +
   1.839 +#endif

mercurial