michael@0: #include michael@0: #include michael@0: #include michael@0: michael@0: #include "libdis.h" michael@0: #include "ia32_insn.h" michael@0: #include "ia32_operand.h" michael@0: #include "ia32_modrm.h" michael@0: #include "ia32_reg.h" michael@0: #include "x86_imm.h" michael@0: #include "x86_operand_list.h" michael@0: michael@0: michael@0: michael@0: /* apply segment override to memory operand in insn */ michael@0: static void apply_seg( x86_op_t *op, unsigned int prefixes ) { michael@0: if (! prefixes ) return; michael@0: michael@0: /* apply overrides from prefix */ michael@0: switch ( prefixes & PREFIX_REG_MASK ) { michael@0: case PREFIX_CS: michael@0: op->flags |= op_cs_seg; break; michael@0: case PREFIX_SS: michael@0: op->flags |= op_ss_seg; break; michael@0: case PREFIX_DS: michael@0: op->flags |= op_ds_seg; break; michael@0: case PREFIX_ES: michael@0: op->flags |= op_es_seg; break; michael@0: case PREFIX_FS: michael@0: op->flags |= op_fs_seg; break; michael@0: case PREFIX_GS: michael@0: op->flags |= op_gs_seg; break; michael@0: } michael@0: michael@0: return; michael@0: } michael@0: michael@0: static size_t decode_operand_value( unsigned char *buf, size_t buf_len, michael@0: x86_op_t *op, x86_insn_t *insn, michael@0: unsigned int addr_meth, size_t op_size, michael@0: unsigned int op_value, unsigned char modrm, michael@0: size_t gen_regs ) { michael@0: size_t size = 0; michael@0: michael@0: /* ++ Do Operand Addressing Method / Decode operand ++ */ michael@0: switch (addr_meth) { michael@0: /* This sets the operand Size based on the Intel Opcode Map michael@0: * (Vol 2, Appendix A). Letter encodings are from section michael@0: * A.1.1, 'Codes for Addressing Method' */ michael@0: michael@0: /* ---------------------- Addressing Method -------------- */ michael@0: /* Note that decoding mod ModR/M operand adjusts the size of michael@0: * the instruction, but decoding the reg operand does not. michael@0: * This should not cause any problems, as every 'reg' operand michael@0: * has an associated 'mod' operand. michael@0: * Goddamn-Intel-Note: michael@0: * Some Intel addressing methods [M, R] specify that modR/M michael@0: * byte may only refer to a memory address/may only refer to michael@0: * a register -- however Intel provides no clues on what to do michael@0: * if, say, the modR/M for an M opcode decodes to a register michael@0: * rather than a memory address ... returning 0 is out of the michael@0: * question, as this would be an Immediate or a RelOffset, so michael@0: * instead these modR/Ms are decoded with total disregard to michael@0: * the M, R constraints. */ michael@0: michael@0: /* MODRM -- mod operand. sets size to at least 1! */ michael@0: case ADDRMETH_E: /* ModR/M present, Gen reg or memory */ michael@0: size = ia32_modrm_decode( buf, buf_len, op, insn, michael@0: gen_regs ); michael@0: break; michael@0: case ADDRMETH_M: /* ModR/M only refers to memory */ michael@0: size = ia32_modrm_decode( buf, buf_len, op, insn, michael@0: gen_regs ); michael@0: break; michael@0: case ADDRMETH_Q: /* ModR/M present, MMX or Memory */ michael@0: size = ia32_modrm_decode( buf, buf_len, op, insn, michael@0: REG_MMX_OFFSET ); michael@0: break; michael@0: case ADDRMETH_R: /* ModR/M mod == gen reg */ michael@0: size = ia32_modrm_decode( buf, buf_len, op, insn, michael@0: gen_regs ); michael@0: break; michael@0: case ADDRMETH_W: /* ModR/M present, mem or SIMD reg */ michael@0: size = ia32_modrm_decode( buf, buf_len, op, insn, michael@0: REG_SIMD_OFFSET ); michael@0: break; michael@0: michael@0: /* MODRM -- reg operand. does not effect size! */ michael@0: case ADDRMETH_C: /* ModR/M reg == control reg */ michael@0: ia32_reg_decode( modrm, op, REG_CTRL_OFFSET ); michael@0: break; michael@0: case ADDRMETH_D: /* ModR/M reg == debug reg */ michael@0: ia32_reg_decode( modrm, op, REG_DEBUG_OFFSET ); michael@0: break; michael@0: case ADDRMETH_G: /* ModR/M reg == gen-purpose reg */ michael@0: ia32_reg_decode( modrm, op, gen_regs ); michael@0: break; michael@0: case ADDRMETH_P: /* ModR/M reg == qword MMX reg */ michael@0: ia32_reg_decode( modrm, op, REG_MMX_OFFSET ); michael@0: break; michael@0: case ADDRMETH_S: /* ModR/M reg == segment reg */ michael@0: ia32_reg_decode( modrm, op, REG_SEG_OFFSET ); michael@0: break; michael@0: case ADDRMETH_T: /* ModR/M reg == test reg */ michael@0: ia32_reg_decode( modrm, op, REG_TEST_OFFSET ); michael@0: break; michael@0: case ADDRMETH_V: /* ModR/M reg == SIMD reg */ michael@0: ia32_reg_decode( modrm, op, REG_SIMD_OFFSET ); michael@0: break; michael@0: michael@0: /* No MODRM : note these set operand type explicitly */ michael@0: case ADDRMETH_A: /* No modR/M -- direct addr */ michael@0: op->type = op_absolute; michael@0: michael@0: /* segment:offset address used in far calls */ michael@0: x86_imm_sized( buf, buf_len, michael@0: &op->data.absolute.segment, 2 ); michael@0: if ( insn->addr_size == 4 ) { michael@0: x86_imm_sized( buf, buf_len, michael@0: &op->data.absolute.offset.off32, 4 ); michael@0: size = 6; michael@0: } else { michael@0: x86_imm_sized( buf, buf_len, michael@0: &op->data.absolute.offset.off16, 2 ); michael@0: size = 4; michael@0: } michael@0: michael@0: break; michael@0: case ADDRMETH_I: /* Immediate val */ michael@0: op->type = op_immediate; michael@0: /* if it ever becomes legal to have imm as dest and michael@0: * there is a src ModR/M operand, we are screwed! */ michael@0: if ( op->flags & op_signed ) { michael@0: x86_imm_signsized(buf, buf_len, &op->data.byte, michael@0: op_size); michael@0: } else { michael@0: x86_imm_sized(buf, buf_len, &op->data.byte, michael@0: op_size); michael@0: } michael@0: size = op_size; michael@0: break; michael@0: case ADDRMETH_J: /* Rel offset to add to IP [jmp] */ michael@0: /* this fills op->data.near_offset or michael@0: op->data.far_offset depending on the size of michael@0: the operand */ michael@0: op->flags |= op_signed; michael@0: if ( op_size == 1 ) { michael@0: /* one-byte near offset */ michael@0: op->type = op_relative_near; michael@0: x86_imm_signsized(buf, buf_len, michael@0: &op->data.relative_near, 1); michael@0: } else { michael@0: /* far offset...is this truly signed? */ michael@0: op->type = op_relative_far; michael@0: x86_imm_signsized(buf, buf_len, michael@0: &op->data.relative_far, op_size ); michael@0: } michael@0: size = op_size; michael@0: break; michael@0: case ADDRMETH_O: /* No ModR/M; op is word/dword offset */ michael@0: /* NOTE: these are actually RVAs not offsets to seg!! */ michael@0: /* note bene: 'O' ADDR_METH uses addr_size to michael@0: determine operand size */ michael@0: op->type = op_offset; michael@0: op->flags |= op_pointer; michael@0: x86_imm_sized( buf, buf_len, &op->data.offset, michael@0: insn->addr_size ); michael@0: michael@0: size = insn->addr_size; michael@0: break; michael@0: michael@0: /* Hard-coded: these are specified in the insn definition */ michael@0: case ADDRMETH_F: /* EFLAGS register */ michael@0: op->type = op_register; michael@0: op->flags |= op_hardcode; michael@0: ia32_handle_register( &op->data.reg, REG_FLAGS_INDEX ); michael@0: break; michael@0: case ADDRMETH_X: /* Memory addressed by DS:SI [string] */ michael@0: op->type = op_expression; michael@0: op->flags |= op_hardcode; michael@0: op->flags |= op_ds_seg | op_pointer | op_string; michael@0: ia32_handle_register( &op->data.expression.base, michael@0: REG_DWORD_OFFSET + 6 ); michael@0: break; michael@0: case ADDRMETH_Y: /* Memory addressed by ES:DI [string] */ michael@0: op->type = op_expression; michael@0: op->flags |= op_hardcode; michael@0: op->flags |= op_es_seg | op_pointer | op_string; michael@0: ia32_handle_register( &op->data.expression.base, michael@0: REG_DWORD_OFFSET + 7 ); michael@0: break; michael@0: case ADDRMETH_RR: /* Gen Register hard-coded in opcode */ michael@0: op->type = op_register; michael@0: op->flags |= op_hardcode; michael@0: ia32_handle_register( &op->data.reg, michael@0: op_value + gen_regs ); michael@0: break; michael@0: case ADDRMETH_RS: /* Seg Register hard-coded in opcode */ michael@0: op->type = op_register; michael@0: op->flags |= op_hardcode; michael@0: ia32_handle_register( &op->data.reg, michael@0: op_value + REG_SEG_OFFSET ); michael@0: break; michael@0: case ADDRMETH_RF: /* FPU Register hard-coded in opcode */ michael@0: op->type = op_register; michael@0: op->flags |= op_hardcode; michael@0: ia32_handle_register( &op->data.reg, michael@0: op_value + REG_FPU_OFFSET ); michael@0: break; michael@0: case ADDRMETH_RT: /* TST Register hard-coded in opcode */ michael@0: op->type = op_register; michael@0: op->flags |= op_hardcode; michael@0: ia32_handle_register( &op->data.reg, michael@0: op_value + REG_TEST_OFFSET ); michael@0: break; michael@0: case ADDRMETH_II: /* Immediate hard-coded in opcode */ michael@0: op->type = op_immediate; michael@0: op->data.dword = op_value; michael@0: op->flags |= op_hardcode; michael@0: break; michael@0: michael@0: case 0: /* Operand is not used */ michael@0: default: michael@0: /* ignore -- operand not used in this insn */ michael@0: op->type = op_unused; /* this shouldn't happen! */ michael@0: break; michael@0: } michael@0: michael@0: return size; michael@0: } michael@0: michael@0: static size_t decode_operand_size( unsigned int op_type, x86_insn_t *insn, michael@0: x86_op_t *op ){ michael@0: size_t size; michael@0: michael@0: /* ++ Do Operand Type ++ */ michael@0: switch (op_type) { michael@0: /* This sets the operand Size based on the Intel Opcode Map michael@0: * (Vol 2, Appendix A). Letter encodings are from section michael@0: * A.1.2, 'Codes for Operand Type' */ michael@0: /* NOTE: in this routines, 'size' refers to the size michael@0: * of the operand in the raw (encoded) instruction; michael@0: * 'datatype' stores the actual size and datatype michael@0: * of the operand */ michael@0: michael@0: /* ------------------------ Operand Type ----------------- */ michael@0: case OPTYPE_c: /* byte or word [op size attr] */ michael@0: size = (insn->op_size == 4) ? 2 : 1; michael@0: op->datatype = (size == 4) ? op_word : op_byte; michael@0: break; michael@0: case OPTYPE_a: /* 2 word or 2 dword [op size attr] */ michael@0: /* pointer to a 16:16 or 32:32 BOUNDS operand */ michael@0: size = (insn->op_size == 4) ? 8 : 4; michael@0: op->datatype = (size == 4) ? op_bounds32 : op_bounds16; michael@0: break; michael@0: case OPTYPE_v: /* word or dword [op size attr] */ michael@0: size = (insn->op_size == 4) ? 4 : 2; michael@0: op->datatype = (size == 4) ? op_dword : op_word; michael@0: break; michael@0: case OPTYPE_p: /* 32/48-bit ptr [op size attr] */ michael@0: /* technically these flags are not accurate: the michael@0: * value s a 16:16 pointer or a 16:32 pointer, where michael@0: * the first '16' is a segment */ michael@0: size = (insn->addr_size == 4) ? 6 : 4; michael@0: op->datatype = (size == 4) ? op_descr32 : op_descr16; michael@0: break; michael@0: case OPTYPE_b: /* byte, ignore op-size */ michael@0: size = 1; michael@0: op->datatype = op_byte; michael@0: break; michael@0: case OPTYPE_w: /* word, ignore op-size */ michael@0: size = 2; michael@0: op->datatype = op_word; michael@0: break; michael@0: case OPTYPE_d: /* dword , ignore op-size */ michael@0: size = 4; michael@0: op->datatype = op_dword; michael@0: break; michael@0: case OPTYPE_s: /* 6-byte psuedo-descriptor */ michael@0: /* ptr to 6-byte value which is 32:16 in 32-bit michael@0: * mode, or 8:24:16 in 16-bit mode. The high byte michael@0: * is ignored in 16-bit mode. */ michael@0: size = 6; michael@0: op->datatype = (insn->addr_size == 4) ? michael@0: op_pdescr32 : op_pdescr16; michael@0: break; michael@0: case OPTYPE_q: /* qword, ignore op-size */ michael@0: size = 8; michael@0: op->datatype = op_qword; michael@0: break; michael@0: case OPTYPE_dq: /* d-qword, ignore op-size */ michael@0: size = 16; michael@0: op->datatype = op_dqword; michael@0: break; michael@0: case OPTYPE_ps: /* 128-bit FP data */ michael@0: size = 16; michael@0: /* really this is 4 packed SP FP values */ michael@0: op->datatype = op_ssimd; michael@0: break; michael@0: case OPTYPE_pd: /* 128-bit FP data */ michael@0: size = 16; michael@0: /* really this is 2 packed DP FP values */ michael@0: op->datatype = op_dsimd; michael@0: break; michael@0: case OPTYPE_ss: /* Scalar elem of 128-bit FP data */ michael@0: size = 16; michael@0: /* this only looks at the low dword (4 bytes) michael@0: * of the xmmm register passed as a param. michael@0: * This is a 16-byte register where only 4 bytes michael@0: * are used in the insn. Painful, ain't it? */ michael@0: op->datatype = op_sssimd; michael@0: break; michael@0: case OPTYPE_sd: /* Scalar elem of 128-bit FP data */ michael@0: size = 16; michael@0: /* this only looks at the low qword (8 bytes) michael@0: * of the xmmm register passed as a param. michael@0: * This is a 16-byte register where only 8 bytes michael@0: * are used in the insn. Painful, again... */ michael@0: op->datatype = op_sdsimd; michael@0: break; michael@0: case OPTYPE_pi: /* qword mmx register */ michael@0: size = 8; michael@0: op->datatype = op_qword; michael@0: break; michael@0: case OPTYPE_si: /* dword integer register */ michael@0: size = 4; michael@0: op->datatype = op_dword; michael@0: break; michael@0: case OPTYPE_fs: /* single-real */ michael@0: size = 4; michael@0: op->datatype = op_sreal; michael@0: break; michael@0: case OPTYPE_fd: /* double real */ michael@0: size = 8; michael@0: op->datatype = op_dreal; michael@0: break; michael@0: case OPTYPE_fe: /* extended real */ michael@0: size = 10; michael@0: op->datatype = op_extreal; michael@0: break; michael@0: case OPTYPE_fb: /* packed BCD */ michael@0: size = 10; michael@0: op->datatype = op_bcd; michael@0: break; michael@0: case OPTYPE_fv: /* pointer to FPU env: 14 or 28-bytes */ michael@0: size = (insn->addr_size == 4)? 28 : 14; michael@0: op->datatype = (size == 28)? op_fpuenv32: op_fpuenv16; michael@0: break; michael@0: case OPTYPE_ft: /* pointer to FPU env: 94 or 108 bytes */ michael@0: size = (insn->addr_size == 4)? 108 : 94; michael@0: op->datatype = (size == 108)? michael@0: op_fpustate32: op_fpustate16; michael@0: break; michael@0: case OPTYPE_fx: /* 512-byte register stack */ michael@0: size = 512; michael@0: op->datatype = op_fpregset; michael@0: break; michael@0: case OPTYPE_fp: /* floating point register */ michael@0: size = 10; /* double extended precision */ michael@0: op->datatype = op_fpreg; michael@0: break; michael@0: case OPTYPE_m: /* fake operand type used for "lea Gv, M" */ michael@0: size = insn->addr_size; michael@0: op->datatype = (size == 4) ? op_dword : op_word; michael@0: break; michael@0: case OPTYPE_none: /* handle weird instructions that have no encoding but use a dword datatype, like invlpg */ michael@0: size = 0; michael@0: op->datatype = op_none; michael@0: break; michael@0: case 0: michael@0: default: michael@0: size = insn->op_size; michael@0: op->datatype = (size == 4) ? op_dword : op_word; michael@0: break; michael@0: } michael@0: return size; michael@0: } michael@0: michael@0: size_t ia32_decode_operand( unsigned char *buf, size_t buf_len, michael@0: x86_insn_t *insn, unsigned int raw_op, michael@0: unsigned int raw_flags, unsigned int prefixes, michael@0: unsigned char modrm ) { michael@0: unsigned int addr_meth, op_type, op_size, gen_regs; michael@0: x86_op_t *op; michael@0: size_t size; michael@0: michael@0: /* ++ Yank optype and addr mode out of operand flags */ michael@0: addr_meth = raw_flags & ADDRMETH_MASK; michael@0: op_type = raw_flags & OPTYPE_MASK; michael@0: michael@0: if ( raw_flags == ARG_NONE ) { michael@0: /* operand is not used in this instruction */ michael@0: return 0; michael@0: } michael@0: michael@0: /* allocate a new operand */ michael@0: op = x86_operand_new( insn ); michael@0: michael@0: /* ++ Copy flags from opcode table to x86_insn_t */ michael@0: op->access = (enum x86_op_access) OP_PERM(raw_flags); michael@0: op->flags = (enum x86_op_flags) (OP_FLAGS(raw_flags) >> 12); michael@0: michael@0: /* Get size (for decoding) and datatype of operand */ michael@0: op_size = decode_operand_size(op_type, insn, op); michael@0: michael@0: /* override default register set based on Operand Type */ michael@0: /* this allows mixing of 8, 16, and 32 bit regs in insn */ michael@0: if (op_size == 1) { michael@0: gen_regs = REG_BYTE_OFFSET; michael@0: } else if (op_size == 2) { michael@0: gen_regs = REG_WORD_OFFSET; michael@0: } else { michael@0: gen_regs = REG_DWORD_OFFSET; michael@0: } michael@0: michael@0: size = decode_operand_value( buf, buf_len, op, insn, addr_meth, michael@0: op_size, raw_op, modrm, gen_regs ); michael@0: michael@0: /* if operand is an address, apply any segment override prefixes */ michael@0: if ( op->type == op_expression || op->type == op_offset ) { michael@0: apply_seg(op, prefixes); michael@0: } michael@0: michael@0: return size; /* return number of bytes in instruction */ michael@0: }