Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | #include <stdio.h> |
michael@0 | 2 | #include <stdlib.h> |
michael@0 | 3 | #include <string.h> |
michael@0 | 4 | #include "qword.h" |
michael@0 | 5 | |
michael@0 | 6 | #include "ia32_insn.h" |
michael@0 | 7 | #include "ia32_opcode_tables.h" |
michael@0 | 8 | |
michael@0 | 9 | #include "ia32_reg.h" |
michael@0 | 10 | #include "ia32_operand.h" |
michael@0 | 11 | #include "ia32_implicit.h" |
michael@0 | 12 | #include "ia32_settings.h" |
michael@0 | 13 | |
michael@0 | 14 | #include "libdis.h" |
michael@0 | 15 | |
michael@0 | 16 | extern ia32_table_desc_t ia32_tables[]; |
michael@0 | 17 | extern ia32_settings_t ia32_settings; |
michael@0 | 18 | |
michael@0 | 19 | #define IS_SP( op ) (op->type == op_register && \ |
michael@0 | 20 | (op->data.reg.id == REG_ESP_INDEX || \ |
michael@0 | 21 | op->data.reg.alias == REG_ESP_INDEX) ) |
michael@0 | 22 | #define IS_IMM( op ) (op->type == op_immediate ) |
michael@0 | 23 | |
michael@0 | 24 | #ifdef WIN32 |
michael@0 | 25 | # define INLINE |
michael@0 | 26 | #else |
michael@0 | 27 | # define INLINE inline |
michael@0 | 28 | #endif |
michael@0 | 29 | |
michael@0 | 30 | /* for calculating stack modification based on an operand */ |
michael@0 | 31 | static INLINE int32_t long_from_operand( x86_op_t *op ) { |
michael@0 | 32 | |
michael@0 | 33 | if (! IS_IMM(op) ) { |
michael@0 | 34 | return 0L; |
michael@0 | 35 | } |
michael@0 | 36 | |
michael@0 | 37 | switch ( op->datatype ) { |
michael@0 | 38 | case op_byte: |
michael@0 | 39 | return (int32_t) op->data.sbyte; |
michael@0 | 40 | case op_word: |
michael@0 | 41 | return (int32_t) op->data.sword; |
michael@0 | 42 | case op_qword: |
michael@0 | 43 | return (int32_t) op->data.sqword; |
michael@0 | 44 | case op_dword: |
michael@0 | 45 | return op->data.sdword; |
michael@0 | 46 | default: |
michael@0 | 47 | /* these are not used in stack insn */ |
michael@0 | 48 | break; |
michael@0 | 49 | } |
michael@0 | 50 | |
michael@0 | 51 | return 0L; |
michael@0 | 52 | } |
michael@0 | 53 | |
michael@0 | 54 | |
michael@0 | 55 | /* determine what this insn does to the stack */ |
michael@0 | 56 | static void ia32_stack_mod(x86_insn_t *insn) { |
michael@0 | 57 | x86_op_t *dest, *src = NULL; |
michael@0 | 58 | |
michael@0 | 59 | if (! insn || ! insn->operands ) { |
michael@0 | 60 | return; |
michael@0 | 61 | } |
michael@0 | 62 | |
michael@0 | 63 | dest = &insn->operands->op; |
michael@0 | 64 | if ( dest ) { |
michael@0 | 65 | src = &insn->operands->next->op; |
michael@0 | 66 | } |
michael@0 | 67 | |
michael@0 | 68 | insn->stack_mod = 0; |
michael@0 | 69 | insn->stack_mod_val = 0; |
michael@0 | 70 | |
michael@0 | 71 | switch ( insn->type ) { |
michael@0 | 72 | case insn_call: |
michael@0 | 73 | case insn_callcc: |
michael@0 | 74 | insn->stack_mod = 1; |
michael@0 | 75 | insn->stack_mod_val = insn->addr_size * -1; |
michael@0 | 76 | break; |
michael@0 | 77 | case insn_push: |
michael@0 | 78 | insn->stack_mod = 1; |
michael@0 | 79 | insn->stack_mod_val = insn->addr_size * -1; |
michael@0 | 80 | break; |
michael@0 | 81 | case insn_return: |
michael@0 | 82 | insn->stack_mod = 1; |
michael@0 | 83 | insn->stack_mod_val = insn->addr_size; |
michael@0 | 84 | case insn_int: case insn_intcc: |
michael@0 | 85 | case insn_iret: |
michael@0 | 86 | break; |
michael@0 | 87 | case insn_pop: |
michael@0 | 88 | insn->stack_mod = 1; |
michael@0 | 89 | if (! IS_SP( dest ) ) { |
michael@0 | 90 | insn->stack_mod_val = insn->op_size; |
michael@0 | 91 | } /* else we don't know the stack change in a pop esp */ |
michael@0 | 92 | break; |
michael@0 | 93 | case insn_enter: |
michael@0 | 94 | insn->stack_mod = 1; |
michael@0 | 95 | insn->stack_mod_val = 0; /* TODO : FIX */ |
michael@0 | 96 | break; |
michael@0 | 97 | case insn_leave: |
michael@0 | 98 | insn->stack_mod = 1; |
michael@0 | 99 | insn->stack_mod_val = 0; /* TODO : FIX */ |
michael@0 | 100 | break; |
michael@0 | 101 | case insn_pushregs: |
michael@0 | 102 | insn->stack_mod = 1; |
michael@0 | 103 | insn->stack_mod_val = 0; /* TODO : FIX */ |
michael@0 | 104 | break; |
michael@0 | 105 | case insn_popregs: |
michael@0 | 106 | insn->stack_mod = 1; |
michael@0 | 107 | insn->stack_mod_val = 0; /* TODO : FIX */ |
michael@0 | 108 | break; |
michael@0 | 109 | case insn_pushflags: |
michael@0 | 110 | insn->stack_mod = 1; |
michael@0 | 111 | insn->stack_mod_val = 0; /* TODO : FIX */ |
michael@0 | 112 | break; |
michael@0 | 113 | case insn_popflags: |
michael@0 | 114 | insn->stack_mod = 1; |
michael@0 | 115 | insn->stack_mod_val = 0; /* TODO : FIX */ |
michael@0 | 116 | break; |
michael@0 | 117 | case insn_add: |
michael@0 | 118 | if ( IS_SP( dest ) ) { |
michael@0 | 119 | insn->stack_mod = 1; |
michael@0 | 120 | insn->stack_mod_val = long_from_operand( src ); |
michael@0 | 121 | } |
michael@0 | 122 | break; |
michael@0 | 123 | case insn_sub: |
michael@0 | 124 | if ( IS_SP( dest ) ) { |
michael@0 | 125 | insn->stack_mod = 1; |
michael@0 | 126 | insn->stack_mod_val = long_from_operand( src ); |
michael@0 | 127 | insn->stack_mod_val *= -1; |
michael@0 | 128 | } |
michael@0 | 129 | break; |
michael@0 | 130 | case insn_inc: |
michael@0 | 131 | if ( IS_SP( dest ) ) { |
michael@0 | 132 | insn->stack_mod = 1; |
michael@0 | 133 | insn->stack_mod_val = 1; |
michael@0 | 134 | } |
michael@0 | 135 | break; |
michael@0 | 136 | case insn_dec: |
michael@0 | 137 | if ( IS_SP( dest ) ) { |
michael@0 | 138 | insn->stack_mod = 1; |
michael@0 | 139 | insn->stack_mod_val = 1; |
michael@0 | 140 | } |
michael@0 | 141 | break; |
michael@0 | 142 | case insn_mov: case insn_movcc: |
michael@0 | 143 | case insn_xchg: case insn_xchgcc: |
michael@0 | 144 | case insn_mul: case insn_div: |
michael@0 | 145 | case insn_shl: case insn_shr: |
michael@0 | 146 | case insn_rol: case insn_ror: |
michael@0 | 147 | case insn_and: case insn_or: |
michael@0 | 148 | case insn_not: case insn_neg: |
michael@0 | 149 | case insn_xor: |
michael@0 | 150 | if ( IS_SP( dest ) ) { |
michael@0 | 151 | insn->stack_mod = 1; |
michael@0 | 152 | } |
michael@0 | 153 | break; |
michael@0 | 154 | default: |
michael@0 | 155 | break; |
michael@0 | 156 | } |
michael@0 | 157 | if (! strcmp("enter", insn->mnemonic) ) { |
michael@0 | 158 | insn->stack_mod = 1; |
michael@0 | 159 | } else if (! strcmp("leave", insn->mnemonic) ) { |
michael@0 | 160 | insn->stack_mod = 1; |
michael@0 | 161 | } |
michael@0 | 162 | |
michael@0 | 163 | /* for mov, etc we return 0 -- unknown stack mod */ |
michael@0 | 164 | |
michael@0 | 165 | return; |
michael@0 | 166 | } |
michael@0 | 167 | |
michael@0 | 168 | /* get the cpu details for this insn from cpu flags int */ |
michael@0 | 169 | static void ia32_handle_cpu( x86_insn_t *insn, unsigned int cpu ) { |
michael@0 | 170 | insn->cpu = (enum x86_insn_cpu) CPU_MODEL(cpu); |
michael@0 | 171 | insn->isa = (enum x86_insn_isa) (ISA_SUBSET(cpu)) >> 16; |
michael@0 | 172 | return; |
michael@0 | 173 | } |
michael@0 | 174 | |
michael@0 | 175 | /* handle mnemonic type and group */ |
michael@0 | 176 | static void ia32_handle_mnemtype(x86_insn_t *insn, unsigned int mnemtype) { |
michael@0 | 177 | unsigned int type = mnemtype & ~INS_FLAG_MASK; |
michael@0 | 178 | insn->group = (enum x86_insn_group) (INS_GROUP(type)) >> 12; |
michael@0 | 179 | insn->type = (enum x86_insn_type) INS_TYPE(type); |
michael@0 | 180 | |
michael@0 | 181 | return; |
michael@0 | 182 | } |
michael@0 | 183 | |
michael@0 | 184 | static void ia32_handle_notes(x86_insn_t *insn, unsigned int notes) { |
michael@0 | 185 | insn->note = (enum x86_insn_note) notes; |
michael@0 | 186 | return; |
michael@0 | 187 | } |
michael@0 | 188 | |
michael@0 | 189 | static void ia32_handle_eflags( x86_insn_t *insn, unsigned int eflags) { |
michael@0 | 190 | unsigned int flags; |
michael@0 | 191 | |
michael@0 | 192 | /* handle flags effected */ |
michael@0 | 193 | flags = INS_FLAGS_TEST(eflags); |
michael@0 | 194 | /* handle weird OR cases */ |
michael@0 | 195 | /* these are either JLE (ZF | SF<>OF) or JBE (CF | ZF) */ |
michael@0 | 196 | if (flags & INS_TEST_OR) { |
michael@0 | 197 | flags &= ~INS_TEST_OR; |
michael@0 | 198 | if ( flags & INS_TEST_ZERO ) { |
michael@0 | 199 | flags &= ~INS_TEST_ZERO; |
michael@0 | 200 | if ( flags & INS_TEST_CARRY ) { |
michael@0 | 201 | flags &= ~INS_TEST_CARRY ; |
michael@0 | 202 | flags |= (int)insn_carry_or_zero_set; |
michael@0 | 203 | } else if ( flags & INS_TEST_SFNEOF ) { |
michael@0 | 204 | flags &= ~INS_TEST_SFNEOF; |
michael@0 | 205 | flags |= (int)insn_zero_set_or_sign_ne_oflow; |
michael@0 | 206 | } |
michael@0 | 207 | } |
michael@0 | 208 | } |
michael@0 | 209 | insn->flags_tested = (enum x86_flag_status) flags; |
michael@0 | 210 | |
michael@0 | 211 | insn->flags_set = (enum x86_flag_status) INS_FLAGS_SET(eflags) >> 16; |
michael@0 | 212 | |
michael@0 | 213 | return; |
michael@0 | 214 | } |
michael@0 | 215 | |
michael@0 | 216 | static void ia32_handle_prefix( x86_insn_t *insn, unsigned int prefixes ) { |
michael@0 | 217 | |
michael@0 | 218 | insn->prefix = (enum x86_insn_prefix) prefixes & PREFIX_MASK; // >> 20; |
michael@0 | 219 | if (! (insn->prefix & PREFIX_PRINT_MASK) ) { |
michael@0 | 220 | /* no printable prefixes */ |
michael@0 | 221 | insn->prefix = insn_no_prefix; |
michael@0 | 222 | } |
michael@0 | 223 | |
michael@0 | 224 | /* concat all prefix strings */ |
michael@0 | 225 | if ( (unsigned int)insn->prefix & PREFIX_LOCK ) { |
michael@0 | 226 | strncat(insn->prefix_string, "lock ", 32 - |
michael@0 | 227 | strlen(insn->prefix_string)); |
michael@0 | 228 | } |
michael@0 | 229 | |
michael@0 | 230 | if ( (unsigned int)insn->prefix & PREFIX_REPNZ ) { |
michael@0 | 231 | strncat(insn->prefix_string, "repnz ", 32 - |
michael@0 | 232 | strlen(insn->prefix_string)); |
michael@0 | 233 | } else if ( (unsigned int)insn->prefix & PREFIX_REPZ ) { |
michael@0 | 234 | strncat(insn->prefix_string, "repz ", 32 - |
michael@0 | 235 | strlen(insn->prefix_string)); |
michael@0 | 236 | } |
michael@0 | 237 | |
michael@0 | 238 | return; |
michael@0 | 239 | } |
michael@0 | 240 | |
michael@0 | 241 | |
michael@0 | 242 | static void reg_32_to_16( x86_op_t *op, x86_insn_t *insn, void *arg ) { |
michael@0 | 243 | |
michael@0 | 244 | /* if this is a 32-bit register and it is a general register ... */ |
michael@0 | 245 | if ( op->type == op_register && op->data.reg.size == 4 && |
michael@0 | 246 | (op->data.reg.type & reg_gen) ) { |
michael@0 | 247 | /* WORD registers are 8 indices off from DWORD registers */ |
michael@0 | 248 | ia32_handle_register( &(op->data.reg), |
michael@0 | 249 | op->data.reg.id + 8 ); |
michael@0 | 250 | } |
michael@0 | 251 | } |
michael@0 | 252 | |
michael@0 | 253 | static void handle_insn_metadata( x86_insn_t *insn, ia32_insn_t *raw_insn ) { |
michael@0 | 254 | ia32_handle_mnemtype( insn, raw_insn->mnem_flag ); |
michael@0 | 255 | ia32_handle_notes( insn, raw_insn->notes ); |
michael@0 | 256 | ia32_handle_eflags( insn, raw_insn->flags_effected ); |
michael@0 | 257 | ia32_handle_cpu( insn, raw_insn->cpu ); |
michael@0 | 258 | ia32_stack_mod( insn ); |
michael@0 | 259 | } |
michael@0 | 260 | |
michael@0 | 261 | static size_t ia32_decode_insn( unsigned char *buf, size_t buf_len, |
michael@0 | 262 | ia32_insn_t *raw_insn, x86_insn_t *insn, |
michael@0 | 263 | unsigned int prefixes ) { |
michael@0 | 264 | size_t size, op_size; |
michael@0 | 265 | unsigned char modrm; |
michael@0 | 266 | |
michael@0 | 267 | /* this should never happen, but just in case... */ |
michael@0 | 268 | if ( raw_insn->mnem_flag == INS_INVALID ) { |
michael@0 | 269 | return 0; |
michael@0 | 270 | } |
michael@0 | 271 | |
michael@0 | 272 | if (ia32_settings.options & opt_16_bit) { |
michael@0 | 273 | insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 4 : 2; |
michael@0 | 274 | insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 4 : 2; |
michael@0 | 275 | } else { |
michael@0 | 276 | insn->op_size = ( prefixes & PREFIX_OP_SIZE ) ? 2 : 4; |
michael@0 | 277 | insn->addr_size = ( prefixes & PREFIX_ADDR_SIZE ) ? 2 : 4; |
michael@0 | 278 | } |
michael@0 | 279 | |
michael@0 | 280 | |
michael@0 | 281 | /* ++++ 1. Copy mnemonic and mnemonic-flags to CODE struct */ |
michael@0 | 282 | if ((ia32_settings.options & opt_att_mnemonics) && raw_insn->mnemonic_att[0]) { |
michael@0 | 283 | strncpy( insn->mnemonic, raw_insn->mnemonic_att, 16 ); |
michael@0 | 284 | } |
michael@0 | 285 | else { |
michael@0 | 286 | strncpy( insn->mnemonic, raw_insn->mnemonic, 16 ); |
michael@0 | 287 | } |
michael@0 | 288 | ia32_handle_prefix( insn, prefixes ); |
michael@0 | 289 | |
michael@0 | 290 | handle_insn_metadata( insn, raw_insn ); |
michael@0 | 291 | |
michael@0 | 292 | /* prefetch the next byte in case it is a modr/m byte -- saves |
michael@0 | 293 | * worrying about whether the 'mod/rm' operand or the 'reg' operand |
michael@0 | 294 | * occurs first */ |
michael@0 | 295 | modrm = GET_BYTE( buf, buf_len ); |
michael@0 | 296 | |
michael@0 | 297 | /* ++++ 2. Decode Explicit Operands */ |
michael@0 | 298 | /* Intel uses up to 3 explicit operands in its instructions; |
michael@0 | 299 | * the first is 'dest', the second is 'src', and the third |
michael@0 | 300 | * is an additional source value (usually an immediate value, |
michael@0 | 301 | * e.g. in the MUL instructions). These three explicit operands |
michael@0 | 302 | * are encoded in the opcode tables, even if they are not used |
michael@0 | 303 | * by the instruction. Additional implicit operands are stored |
michael@0 | 304 | * in a supplemental table and are handled later. */ |
michael@0 | 305 | |
michael@0 | 306 | op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->dest, |
michael@0 | 307 | raw_insn->dest_flag, prefixes, modrm ); |
michael@0 | 308 | /* advance buffer, increase size if necessary */ |
michael@0 | 309 | buf += op_size; |
michael@0 | 310 | buf_len -= op_size; |
michael@0 | 311 | size = op_size; |
michael@0 | 312 | |
michael@0 | 313 | op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->src, |
michael@0 | 314 | raw_insn->src_flag, prefixes, modrm ); |
michael@0 | 315 | buf += op_size; |
michael@0 | 316 | buf_len -= op_size; |
michael@0 | 317 | size += op_size; |
michael@0 | 318 | |
michael@0 | 319 | op_size = ia32_decode_operand( buf, buf_len, insn, raw_insn->aux, |
michael@0 | 320 | raw_insn->aux_flag, prefixes, modrm ); |
michael@0 | 321 | size += op_size; |
michael@0 | 322 | |
michael@0 | 323 | |
michael@0 | 324 | /* ++++ 3. Decode Implicit Operands */ |
michael@0 | 325 | /* apply implicit operands */ |
michael@0 | 326 | ia32_insn_implicit_ops( insn, raw_insn->implicit_ops ); |
michael@0 | 327 | /* we have one small inelegant hack here, to deal with |
michael@0 | 328 | * the two prefixes that have implicit operands. If Intel |
michael@0 | 329 | * adds more, we'll change the algorithm to suit :) */ |
michael@0 | 330 | if ( (prefixes & PREFIX_REPZ) || (prefixes & PREFIX_REPNZ) ) { |
michael@0 | 331 | ia32_insn_implicit_ops( insn, IDX_IMPLICIT_REP ); |
michael@0 | 332 | } |
michael@0 | 333 | |
michael@0 | 334 | |
michael@0 | 335 | /* 16-bit hack: foreach operand, if 32-bit reg, make 16-bit reg */ |
michael@0 | 336 | if ( insn->op_size == 2 ) { |
michael@0 | 337 | x86_operand_foreach( insn, reg_32_to_16, NULL, op_any ); |
michael@0 | 338 | } |
michael@0 | 339 | |
michael@0 | 340 | return size; |
michael@0 | 341 | } |
michael@0 | 342 | |
michael@0 | 343 | |
michael@0 | 344 | /* convenience routine */ |
michael@0 | 345 | #define USES_MOD_RM(flag) \ |
michael@0 | 346 | (flag == ADDRMETH_E || flag == ADDRMETH_M || flag == ADDRMETH_Q || \ |
michael@0 | 347 | flag == ADDRMETH_W || flag == ADDRMETH_R) |
michael@0 | 348 | |
michael@0 | 349 | static int uses_modrm_flag( unsigned int flag ) { |
michael@0 | 350 | unsigned int meth; |
michael@0 | 351 | if ( flag == ARG_NONE ) { |
michael@0 | 352 | return 0; |
michael@0 | 353 | } |
michael@0 | 354 | meth = (flag & ADDRMETH_MASK); |
michael@0 | 355 | if ( USES_MOD_RM(meth) ) { |
michael@0 | 356 | return 1; |
michael@0 | 357 | } |
michael@0 | 358 | |
michael@0 | 359 | return 0; |
michael@0 | 360 | } |
michael@0 | 361 | |
michael@0 | 362 | /* This routine performs the actual byte-by-byte opcode table lookup. |
michael@0 | 363 | * Originally it was pretty simple: get a byte, adjust it to a proper |
michael@0 | 364 | * index into the table, then check the table row at that index to |
michael@0 | 365 | * determine what to do next. But is anything that simple with Intel? |
michael@0 | 366 | * This is now a huge, convoluted mess, mostly of bitter comments. */ |
michael@0 | 367 | /* buf: pointer to next byte to read from stream |
michael@0 | 368 | * buf_len: length of buf |
michael@0 | 369 | * table: index of table to use for lookups |
michael@0 | 370 | * raw_insn: output pointer that receives opcode definition |
michael@0 | 371 | * prefixes: output integer that is encoded with prefixes in insn |
michael@0 | 372 | * returns : number of bytes consumed from stream during lookup */ |
michael@0 | 373 | size_t ia32_table_lookup( unsigned char *buf, size_t buf_len, |
michael@0 | 374 | unsigned int table, ia32_insn_t **raw_insn, |
michael@0 | 375 | unsigned int *prefixes ) { |
michael@0 | 376 | unsigned char *next, op = buf[0]; /* byte value -- 'opcode' */ |
michael@0 | 377 | size_t size = 1, sub_size = 0, next_len; |
michael@0 | 378 | ia32_table_desc_t *table_desc; |
michael@0 | 379 | unsigned int subtable, prefix = 0, recurse_table = 0; |
michael@0 | 380 | |
michael@0 | 381 | table_desc = &ia32_tables[table]; |
michael@0 | 382 | |
michael@0 | 383 | op = GET_BYTE( buf, buf_len ); |
michael@0 | 384 | |
michael@0 | 385 | if ( table_desc->type == tbl_fpu && op > table_desc->maxlim) { |
michael@0 | 386 | /* one of the fucking FPU tables out of the 00-BH range */ |
michael@0 | 387 | /* OK,. this is a bit of a hack -- the proper way would |
michael@0 | 388 | * have been to use subtables in the 00-BF FPU opcode tables, |
michael@0 | 389 | * but that is rather wasteful of space... */ |
michael@0 | 390 | table_desc = &ia32_tables[table +1]; |
michael@0 | 391 | } |
michael@0 | 392 | |
michael@0 | 393 | /* PERFORM TABLE LOOKUP */ |
michael@0 | 394 | |
michael@0 | 395 | /* ModR/M trick: shift extension bits into lowest bits of byte */ |
michael@0 | 396 | /* Note: non-ModR/M tables have a shift value of 0 */ |
michael@0 | 397 | op >>= table_desc->shift; |
michael@0 | 398 | |
michael@0 | 399 | /* ModR/M trick: mask out high bits to turn extension into an index */ |
michael@0 | 400 | /* Note: non-ModR/M tables have a mask value of 0xFF */ |
michael@0 | 401 | op &= table_desc->mask; |
michael@0 | 402 | |
michael@0 | 403 | |
michael@0 | 404 | /* Sparse table trick: check that byte is <= max value */ |
michael@0 | 405 | /* Note: full (256-entry) tables have a maxlim of 155 */ |
michael@0 | 406 | if ( op > table_desc->maxlim ) { |
michael@0 | 407 | /* this is a partial table, truncated at the tail, |
michael@0 | 408 | and op is out of range! */ |
michael@0 | 409 | return INVALID_INSN; |
michael@0 | 410 | } |
michael@0 | 411 | |
michael@0 | 412 | /* Sparse table trick: check that byte is >= min value */ |
michael@0 | 413 | /* Note: full (256-entry) tables have a minlim of 0 */ |
michael@0 | 414 | if ( table_desc->minlim > op ) { |
michael@0 | 415 | /* this is a partial table, truncated at the head, |
michael@0 | 416 | and op is out of range! */ |
michael@0 | 417 | return INVALID_INSN; |
michael@0 | 418 | } |
michael@0 | 419 | /* adjust op to be an offset from table index 0 */ |
michael@0 | 420 | op -= table_desc->minlim; |
michael@0 | 421 | |
michael@0 | 422 | /* Yay! 'op' is now fully adjusted to be an index into 'table' */ |
michael@0 | 423 | *raw_insn = &(table_desc->table[op]); |
michael@0 | 424 | //printf("BYTE %X TABLE %d OP %X\n", buf[0], table, op ); |
michael@0 | 425 | |
michael@0 | 426 | if ( (*raw_insn)->mnem_flag & INS_FLAG_PREFIX ) { |
michael@0 | 427 | prefix = (*raw_insn)->mnem_flag & PREFIX_MASK; |
michael@0 | 428 | } |
michael@0 | 429 | |
michael@0 | 430 | |
michael@0 | 431 | /* handle escape to a multibyte/coproc/extension/etc table */ |
michael@0 | 432 | /* NOTE: if insn is a prefix and has a subtable, then we |
michael@0 | 433 | * only recurse if this is the first prefix byte -- |
michael@0 | 434 | * that is, if *prefixes is 0. |
michael@0 | 435 | * NOTE also that suffix tables are handled later */ |
michael@0 | 436 | subtable = (*raw_insn)->table; |
michael@0 | 437 | |
michael@0 | 438 | if ( subtable && ia32_tables[subtable].type != tbl_suffix && |
michael@0 | 439 | (! prefix || ! *prefixes) ) { |
michael@0 | 440 | |
michael@0 | 441 | if ( ia32_tables[subtable].type == tbl_ext_ext || |
michael@0 | 442 | ia32_tables[subtable].type == tbl_fpu_ext ) { |
michael@0 | 443 | /* opcode extension: reuse current byte in buffer */ |
michael@0 | 444 | next = buf; |
michael@0 | 445 | next_len = buf_len; |
michael@0 | 446 | } else { |
michael@0 | 447 | /* "normal" opcode: advance to next byte in buffer */ |
michael@0 | 448 | if ( buf_len > 1 ) { |
michael@0 | 449 | next = &buf[1]; |
michael@0 | 450 | next_len = buf_len - 1; |
michael@0 | 451 | } |
michael@0 | 452 | else { |
michael@0 | 453 | // buffer is truncated |
michael@0 | 454 | return INVALID_INSN; |
michael@0 | 455 | } |
michael@0 | 456 | } |
michael@0 | 457 | /* we encountered a multibyte opcode: recurse using the |
michael@0 | 458 | * table specified in the opcode definition */ |
michael@0 | 459 | sub_size = ia32_table_lookup( next, next_len, subtable, |
michael@0 | 460 | raw_insn, prefixes ); |
michael@0 | 461 | |
michael@0 | 462 | /* SSE/prefix hack: if the original opcode def was a |
michael@0 | 463 | * prefix that specified a subtable, and the subtable |
michael@0 | 464 | * lookup returned a valid insn, then we have encountered |
michael@0 | 465 | * an SSE opcode definition; otherwise, we pretend we |
michael@0 | 466 | * never did the subtable lookup, and deal with the |
michael@0 | 467 | * prefix normally later */ |
michael@0 | 468 | if ( prefix && ( sub_size == INVALID_INSN || |
michael@0 | 469 | INS_TYPE((*raw_insn)->mnem_flag) == INS_INVALID ) ) { |
michael@0 | 470 | /* this is a prefix, not an SSE insn : |
michael@0 | 471 | * lookup next byte in main table, |
michael@0 | 472 | * subsize will be reset during the |
michael@0 | 473 | * main table lookup */ |
michael@0 | 474 | recurse_table = 1; |
michael@0 | 475 | } else { |
michael@0 | 476 | /* this is either a subtable (two-byte) insn |
michael@0 | 477 | * or an invalid insn: either way, set prefix |
michael@0 | 478 | * to NULL and end the opcode lookup */ |
michael@0 | 479 | prefix = 0; |
michael@0 | 480 | // short-circuit lookup on invalid insn |
michael@0 | 481 | if (sub_size == INVALID_INSN) return INVALID_INSN; |
michael@0 | 482 | } |
michael@0 | 483 | } else if ( prefix ) { |
michael@0 | 484 | recurse_table = 1; |
michael@0 | 485 | } |
michael@0 | 486 | |
michael@0 | 487 | /* by default, we assume that we have the opcode definition, |
michael@0 | 488 | * and there is no need to recurse on the same table, but |
michael@0 | 489 | * if we do then a prefix was encountered... */ |
michael@0 | 490 | if ( recurse_table ) { |
michael@0 | 491 | /* this must have been a prefix: use the same table for |
michael@0 | 492 | * lookup of the next byte */ |
michael@0 | 493 | sub_size = ia32_table_lookup( &buf[1], buf_len - 1, table, |
michael@0 | 494 | raw_insn, prefixes ); |
michael@0 | 495 | |
michael@0 | 496 | // short-circuit lookup on invalid insn |
michael@0 | 497 | if (sub_size == INVALID_INSN) return INVALID_INSN; |
michael@0 | 498 | |
michael@0 | 499 | /* a bit of a hack for branch hints */ |
michael@0 | 500 | if ( prefix & BRANCH_HINT_MASK ) { |
michael@0 | 501 | if ( INS_GROUP((*raw_insn)->mnem_flag) == INS_EXEC ) { |
michael@0 | 502 | /* segment override prefixes are invalid for |
michael@0 | 503 | * all branch instructions, so delete them */ |
michael@0 | 504 | prefix &= ~PREFIX_REG_MASK; |
michael@0 | 505 | } else { |
michael@0 | 506 | prefix &= ~BRANCH_HINT_MASK; |
michael@0 | 507 | } |
michael@0 | 508 | } |
michael@0 | 509 | |
michael@0 | 510 | /* apply prefix to instruction */ |
michael@0 | 511 | |
michael@0 | 512 | /* TODO: implement something enforcing prefix groups */ |
michael@0 | 513 | (*prefixes) |= prefix; |
michael@0 | 514 | } |
michael@0 | 515 | |
michael@0 | 516 | /* if this lookup was in a ModR/M table, then an opcode byte is |
michael@0 | 517 | * NOT consumed: subtract accordingly. NOTE that if none of the |
michael@0 | 518 | * operands used the ModR/M, then we need to consume the byte |
michael@0 | 519 | * here, but ONLY in the 'top-level' opcode extension table */ |
michael@0 | 520 | |
michael@0 | 521 | if ( table_desc->type == tbl_ext_ext ) { |
michael@0 | 522 | /* extensions-to-extensions never consume a byte */ |
michael@0 | 523 | --size; |
michael@0 | 524 | } else if ( (table_desc->type == tbl_extension || |
michael@0 | 525 | table_desc->type == tbl_fpu || |
michael@0 | 526 | table_desc->type == tbl_fpu_ext ) && |
michael@0 | 527 | /* extensions that have an operand encoded in ModR/M |
michael@0 | 528 | * never consume a byte */ |
michael@0 | 529 | (uses_modrm_flag((*raw_insn)->dest_flag) || |
michael@0 | 530 | uses_modrm_flag((*raw_insn)->src_flag) ) ) { |
michael@0 | 531 | --size; |
michael@0 | 532 | } |
michael@0 | 533 | |
michael@0 | 534 | size += sub_size; |
michael@0 | 535 | |
michael@0 | 536 | return size; |
michael@0 | 537 | } |
michael@0 | 538 | |
michael@0 | 539 | static size_t handle_insn_suffix( unsigned char *buf, size_t buf_len, |
michael@0 | 540 | ia32_insn_t *raw_insn, x86_insn_t * insn ) { |
michael@0 | 541 | ia32_table_desc_t *table_desc; |
michael@0 | 542 | ia32_insn_t *sfx_insn; |
michael@0 | 543 | size_t size; |
michael@0 | 544 | unsigned int prefixes = 0; |
michael@0 | 545 | |
michael@0 | 546 | table_desc = &ia32_tables[raw_insn->table]; |
michael@0 | 547 | size = ia32_table_lookup( buf, buf_len, raw_insn->table, &sfx_insn, |
michael@0 | 548 | &prefixes ); |
michael@0 | 549 | if (size == INVALID_INSN || sfx_insn->mnem_flag == INS_INVALID ) { |
michael@0 | 550 | return 0; |
michael@0 | 551 | } |
michael@0 | 552 | |
michael@0 | 553 | strncpy( insn->mnemonic, sfx_insn->mnemonic, 16 ); |
michael@0 | 554 | handle_insn_metadata( insn, sfx_insn ); |
michael@0 | 555 | |
michael@0 | 556 | return 1; |
michael@0 | 557 | } |
michael@0 | 558 | |
michael@0 | 559 | /* invalid instructions are handled by returning 0 [error] from the |
michael@0 | 560 | * function, setting the size of the insn to 1 byte, and copying |
michael@0 | 561 | * the byte at the start of the invalid insn into the x86_insn_t. |
michael@0 | 562 | * if the caller is saving the x86_insn_t for invalid instructions, |
michael@0 | 563 | * instead of discarding them, this will maintain a consistent |
michael@0 | 564 | * address space in the x86_insn_ts */ |
michael@0 | 565 | |
michael@0 | 566 | /* this function is called by the controlling disassembler, so its name and |
michael@0 | 567 | * calling convention cannot be changed */ |
michael@0 | 568 | /* buf points to the loc of the current opcode (start of the |
michael@0 | 569 | * instruction) in the instruction stream. The instruction |
michael@0 | 570 | * stream is assumed to be a buffer of bytes read directly |
michael@0 | 571 | * from the file for the purpose of disassembly; a mem-mapped |
michael@0 | 572 | * file is ideal for * this. |
michael@0 | 573 | * insn points to a code structure to be filled by instr_decode |
michael@0 | 574 | * returns the size of the decoded instruction in bytes */ |
michael@0 | 575 | size_t ia32_disasm_addr( unsigned char * buf, size_t buf_len, |
michael@0 | 576 | x86_insn_t *insn ) { |
michael@0 | 577 | ia32_insn_t *raw_insn = NULL; |
michael@0 | 578 | unsigned int prefixes = 0; |
michael@0 | 579 | size_t size, sfx_size; |
michael@0 | 580 | |
michael@0 | 581 | if ( (ia32_settings.options & opt_ignore_nulls) && buf_len > 3 && |
michael@0 | 582 | !buf[0] && !buf[1] && !buf[2] && !buf[3]) { |
michael@0 | 583 | /* IF IGNORE_NULLS is set AND |
michael@0 | 584 | * first 4 bytes in the intruction stream are NULL |
michael@0 | 585 | * THEN return 0 (END_OF_DISASSEMBLY) */ |
michael@0 | 586 | /* TODO: set errno */ |
michael@0 | 587 | MAKE_INVALID( insn, buf ); |
michael@0 | 588 | return 0; /* 4 00 bytes in a row? This isn't code! */ |
michael@0 | 589 | } |
michael@0 | 590 | |
michael@0 | 591 | /* Perform recursive table lookup starting with main table (0) */ |
michael@0 | 592 | size = ia32_table_lookup(buf, buf_len, idx_Main, &raw_insn, &prefixes); |
michael@0 | 593 | if ( size == INVALID_INSN || size > buf_len || raw_insn->mnem_flag == INS_INVALID ) { |
michael@0 | 594 | MAKE_INVALID( insn, buf ); |
michael@0 | 595 | /* TODO: set errno */ |
michael@0 | 596 | return 0; |
michael@0 | 597 | } |
michael@0 | 598 | |
michael@0 | 599 | /* We now have the opcode itself figured out: we can decode |
michael@0 | 600 | * the rest of the instruction. */ |
michael@0 | 601 | size += ia32_decode_insn( &buf[size], buf_len - size, raw_insn, insn, |
michael@0 | 602 | prefixes ); |
michael@0 | 603 | if ( raw_insn->mnem_flag & INS_FLAG_SUFFIX ) { |
michael@0 | 604 | /* AMD 3DNow! suffix -- get proper operand type here */ |
michael@0 | 605 | sfx_size = handle_insn_suffix( &buf[size], buf_len - size, |
michael@0 | 606 | raw_insn, insn ); |
michael@0 | 607 | if (! sfx_size ) { |
michael@0 | 608 | /* TODO: set errno */ |
michael@0 | 609 | MAKE_INVALID( insn, buf ); |
michael@0 | 610 | return 0; |
michael@0 | 611 | } |
michael@0 | 612 | |
michael@0 | 613 | size += sfx_size; |
michael@0 | 614 | } |
michael@0 | 615 | |
michael@0 | 616 | if (! size ) { |
michael@0 | 617 | /* invalid insn */ |
michael@0 | 618 | MAKE_INVALID( insn, buf ); |
michael@0 | 619 | return 0; |
michael@0 | 620 | } |
michael@0 | 621 | |
michael@0 | 622 | |
michael@0 | 623 | insn->size = size; |
michael@0 | 624 | return size; /* return size of instruction in bytes */ |
michael@0 | 625 | } |