libdasm.h

Go to the documentation of this file.
00001 
00002 /*
00003  * libdasm -- simple x86 disassembly library
00004  * (c) 2004 - 2005  jt / nologin.org
00005  *
00006  * libdasm.h:
00007  * Definitions for structures, functions and other weird stuff
00008  *
00009  */
00010 
00011 
00012 #ifndef _LIBDASM_H
00013 #define _LIBDASM_H
00014 
00015 #ifdef __cplusplus
00016 extern "C" {
00017 #endif
00018 
00019 #define __LIBDASM_VERSION__     0x01040000
00020 
00021 #define GET_VERSION_MAJOR  \
00022         (__LIBDASM_VERSION__ & 0xff000000) >> 24
00023 #define GET_VERSION_MINOR1 \
00024         (__LIBDASM_VERSION__ & 0x00ff0000) >> 16
00025 #define GET_VERSION_MINOR2 \
00026         (__LIBDASM_VERSION__ & 0x0000ff00) >> 8
00027 #define GET_VERSION_MINOR3 \
00028         (__LIBDASM_VERSION__ & 0x000000ff)
00029 
00030 // Data types
00031 
00032 #if _WIN32
00033 #include <windows.h>
00034 #define __inline__ __inline
00035 #define snprintf _snprintf
00036 typedef unsigned __int64 QWORD;         // for MSVC
00037 typedef signed   __int8  SBYTE;
00038 typedef signed   __int16 SWORD;
00039 typedef signed   __int32 SDWORD;
00040 typedef signed   __int64 SQWORD;
00041 #else
00042 #if defined __sun
00043 #define BYTE_ORDER 1234
00044 #define BIG_ENDIAN 1234
00045 #define LITTLE_ENDIAN 4321
00046 #define u_int8_t uint8_t
00047 #define u_int16_t uint16_t
00048 #define u_int32_t uint32_t
00049 #define u_int64_t uint64_t
00050 
00051 #endif // other *nix
00052 #include <sys/types.h>
00053 typedef u_int8_t  BYTE;
00054 typedef u_int16_t WORD;
00055 typedef u_int32_t DWORD;
00056 typedef u_int64_t QWORD;
00057 typedef int8_t    SBYTE;
00058 typedef int16_t   SWORD;
00059 typedef int32_t   SDWORD;
00060 typedef int64_t   SQWORD;
00061 #endif
00062 
00063 // Define endianess
00064 
00065 #ifndef __X86__
00066 // These should catch x86 with most compilers
00067 #if defined _X86_ || defined _i386_ || defined __i386__
00068 #define __X86__
00069 #endif
00070 #endif
00071 
00072 #ifndef __LITTLE_ENDIAN__
00073 // These should catch little-endian with most compilers
00074 #if (BYTE_ORDER == LITTLE_ENDIAN) || defined __X86__ || defined _ALPHA_
00075 #define __LITTLE_ENDIAN__
00076 #endif
00077 #endif
00078 
00079 
00080 // Registers
00081 #define REGISTER_EAX 0
00082 #define REGISTER_ECX 1
00083 #define REGISTER_EDX 2
00084 #define REGISTER_EBX 3
00085 #define REGISTER_ESP 4
00086 #define REGISTER_EBP 5
00087 #define REGISTER_ESI 6
00088 #define REGISTER_EDI 7
00089 #define REGISTER_NOP 8  // no register defined
00090 
00091 // Registers
00092 #define REG_EAX REGISTER_EAX
00093 #define REG_AX REG_EAX
00094 #define REG_AL REG_EAX
00095 #define REG_ES REG_EAX          // Just for reg_table consistence
00096 #define REG_ST0 REG_EAX         // Just for reg_table consistence
00097 #define REG_ECX REGISTER_ECX
00098 #define REG_CX REG_ECX
00099 #define REG_CL REG_ECX
00100 #define REG_CS REG_ECX
00101 #define REG_ST1 REG_ECX
00102 #define REG_EDX REGISTER_EDX
00103 #define REG_DX REG_EDX
00104 #define REG_DL REG_EDX
00105 #define REG_SS REG_EDX
00106 #define REG_ST2 REG_EDX
00107 #define REG_EBX REGISTER_EBX
00108 #define REG_BX REG_EBX
00109 #define REG_BL REG_EBX
00110 #define REG_DS REG_EBX
00111 #define REG_ST3 REG_EBX
00112 #define REG_ESP REGISTER_ESP
00113 #define REG_SP REG_ESP
00114 #define REG_AH REG_ESP          // Just for reg_table consistence
00115 #define REG_FS REG_ESP
00116 #define REG_ST4 REG_ESP
00117 #define REG_EBP REGISTER_EBP
00118 #define REG_BP REG_EBP
00119 #define REG_CH REG_EBP
00120 #define REG_GS REG_EBP
00121 #define REG_ST5 REG_EBP
00122 #define REG_ESI REGISTER_ESI
00123 #define REG_SI REG_ESI
00124 #define REG_DH REG_ESI
00125 #define REG_ST6 REG_ESI
00126 #define REG_EDI REGISTER_EDI
00127 #define REG_DI REG_EDI
00128 #define REG_BH REG_EDI
00129 #define REG_ST7 REG_EDI
00130 #define REG_NOP REGISTER_NOP
00131 
00132 // Register types
00133 #define REGISTER_TYPE_GEN       1
00134 #define REGISTER_TYPE_SEGMENT   2
00135 #define REGISTER_TYPE_DEBUG     3
00136 #define REGISTER_TYPE_CONTROL   4
00137 #define REGISTER_TYPE_TEST      5
00138 #define REGISTER_TYPE_XMM       6
00139 #define REGISTER_TYPE_MMX       7
00140 #define REGISTER_TYPE_FPU       8
00141 
00142 // Disassembling mode
00143 enum Mode {
00144         MODE_32,        // 32-bit
00145         MODE_16         // 16-bit
00146 };
00147 
00148 // Disassembling format
00149 enum Format {
00150         FORMAT_ATT,
00151         FORMAT_INTEL,
00152 };
00153 
00154 // Instruction types (just the most common ones atm)
00155 enum Instruction {
00156         // Integer instructions
00157         INSTRUCTION_TYPE_ASC,   // aaa, aam, etc.
00158         INSTRUCTION_TYPE_DCL,   // daa, das
00159         INSTRUCTION_TYPE_MOV,
00160         INSTRUCTION_TYPE_MOVSR, // segment register
00161         INSTRUCTION_TYPE_ADD,
00162         INSTRUCTION_TYPE_XADD,
00163         INSTRUCTION_TYPE_ADC,
00164         INSTRUCTION_TYPE_SUB,
00165         INSTRUCTION_TYPE_SBB,
00166         INSTRUCTION_TYPE_INC,
00167         INSTRUCTION_TYPE_DEC,
00168         INSTRUCTION_TYPE_DIV,
00169         INSTRUCTION_TYPE_IDIV,
00170         INSTRUCTION_TYPE_NOT,
00171         INSTRUCTION_TYPE_NEG,
00172         INSTRUCTION_TYPE_STOS,
00173         INSTRUCTION_TYPE_LODS,
00174         INSTRUCTION_TYPE_SCAS,
00175         INSTRUCTION_TYPE_MOVS,
00176         INSTRUCTION_TYPE_MOVSX,
00177         INSTRUCTION_TYPE_MOVZX,
00178         INSTRUCTION_TYPE_CMPS,
00179         INSTRUCTION_TYPE_SHX,   // signed/unsigned shift left/right
00180         INSTRUCTION_TYPE_ROX,   // signed/unsigned rot left/right
00181         INSTRUCTION_TYPE_MUL,
00182         INSTRUCTION_TYPE_IMUL,
00183         INSTRUCTION_TYPE_EIMUL, // "extended" imul with 2-3 operands
00184         INSTRUCTION_TYPE_XOR,
00185         INSTRUCTION_TYPE_LEA,
00186         INSTRUCTION_TYPE_XCHG,
00187         INSTRUCTION_TYPE_CMP,
00188         INSTRUCTION_TYPE_TEST,
00189         INSTRUCTION_TYPE_PUSH,
00190         INSTRUCTION_TYPE_AND,
00191         INSTRUCTION_TYPE_OR,
00192         INSTRUCTION_TYPE_POP,
00193         INSTRUCTION_TYPE_JMP,
00194         INSTRUCTION_TYPE_JMPC,  // conditional jump
00195         INSTRUCTION_TYPE_SETC,  // conditional byte set
00196         INSTRUCTION_TYPE_MOVC,  // conditional mov
00197         INSTRUCTION_TYPE_LOOP,
00198         INSTRUCTION_TYPE_CALL,
00199         INSTRUCTION_TYPE_RET,
00200         INSTRUCTION_TYPE_INT,   // interrupt
00201         INSTRUCTION_TYPE_BT,    // bit tests
00202         INSTRUCTION_TYPE_BTS,
00203         INSTRUCTION_TYPE_BTR,
00204         INSTRUCTION_TYPE_BTC,
00205         INSTRUCTION_TYPE_BSF,
00206         INSTRUCTION_TYPE_BSR,
00207         INSTRUCTION_TYPE_BSWAP,
00208         INSTRUCTION_TYPE_SGDT,
00209         INSTRUCTION_TYPE_SIDT,
00210         INSTRUCTION_TYPE_SLDT,
00211         INSTRUCTION_TYPE_LFP,
00212         // FPU instructions
00213         INSTRUCTION_TYPE_FCMOVC, // float conditional mov
00214         INSTRUCTION_TYPE_FADD,
00215         INSTRUCTION_TYPE_FADDP,
00216         INSTRUCTION_TYPE_FIADD,
00217         INSTRUCTION_TYPE_FSUB,
00218         INSTRUCTION_TYPE_FSUBP,
00219         INSTRUCTION_TYPE_FISUB,
00220         INSTRUCTION_TYPE_FSUBR,
00221         INSTRUCTION_TYPE_FSUBRP,
00222         INSTRUCTION_TYPE_FISUBR,
00223         INSTRUCTION_TYPE_FMUL,
00224         INSTRUCTION_TYPE_FMULP,
00225         INSTRUCTION_TYPE_FIMUL,
00226         INSTRUCTION_TYPE_FDIV,
00227         INSTRUCTION_TYPE_FDIVP,
00228         INSTRUCTION_TYPE_FDIVR,
00229         INSTRUCTION_TYPE_FDIVRP,
00230         INSTRUCTION_TYPE_FIDIV,
00231         INSTRUCTION_TYPE_FIDIVR,
00232         INSTRUCTION_TYPE_FCOM,
00233         INSTRUCTION_TYPE_FCOMP,
00234         INSTRUCTION_TYPE_FCOMPP,
00235         INSTRUCTION_TYPE_FCOMI,
00236         INSTRUCTION_TYPE_FCOMIP,
00237         INSTRUCTION_TYPE_FUCOM,
00238         INSTRUCTION_TYPE_FUCOMP,
00239         INSTRUCTION_TYPE_FUCOMPP,
00240         INSTRUCTION_TYPE_FUCOMI,
00241         INSTRUCTION_TYPE_FUCOMIP,
00242         INSTRUCTION_TYPE_FST,
00243         INSTRUCTION_TYPE_FSTP,
00244         INSTRUCTION_TYPE_FIST,
00245         INSTRUCTION_TYPE_FISTP,
00246         INSTRUCTION_TYPE_FISTTP,
00247         INSTRUCTION_TYPE_FLD,
00248         INSTRUCTION_TYPE_FILD,
00249         INSTRUCTION_TYPE_FICOM,
00250         INSTRUCTION_TYPE_FICOMP,
00251         INSTRUCTION_TYPE_FFREE,
00252         INSTRUCTION_TYPE_FFREEP,
00253         INSTRUCTION_TYPE_FXCH,
00254         INSTRUCTION_TYPE_FPU,   // Other FPU instructions
00255 
00256         INSTRUCTION_TYPE_MMX,   // Other MMX instructions
00257 
00258         INSTRUCTION_TYPE_SSE,   // Other SSE instructions
00259 
00260         INSTRUCTION_TYPE_OTHER, // Other instructions :-)
00261         INSTRUCTION_TYPE_PRIV   // Privileged instruction
00262 };
00263 
00264 // Operand types
00265 enum Operand {
00266         OPERAND_TYPE_NONE,      // operand not present
00267         OPERAND_TYPE_MEMORY,    // memory operand ([eax], [0], etc.)
00268         OPERAND_TYPE_REGISTER,  // register operand (eax, mm0, etc.)
00269         OPERAND_TYPE_IMMEDIATE, // immediate operand (0x1234)
00270 };
00271 
00272 // Structure definitions
00273 
00274 // struct INST is used internally by the library
00275 typedef struct _INST {
00276         DWORD type;             // Instruction type and flags
00277         const char *mnemonic;   // Instruction mnemonic
00278         int flags1;             // First operand flags (if any)
00279         int flags2;             // Second operand flags (if any)
00280         int flags3;             // Additional operand flags (if any)
00281         int modrm;              // Is MODRM byte present?
00282 } INST, *PINST;
00283 
00284 // Operands for the instruction
00285 typedef struct _OPERAND {
00286         enum Operand type;      // Operand type (register, memory, etc)
00287         int reg;                // Register (if any)
00288         int basereg;            // Base register (if any)
00289         int indexreg;           // Index register (if any)
00290         int scale;              // Scale (if any)
00291         int dispbytes;          // Displacement bytes (0 = no displacement)
00292         int dispoffset;         // Displacement value offset
00293         int immbytes;           // Immediate bytes (0 = no immediate)
00294         int immoffset;          // Immediate value offset
00295         int sectionbytes;       // Section prefix bytes (0 = no section prefix)
00296         WORD section;           // Section prefix value
00297         DWORD displacement;     // Displacement value
00298         DWORD immediate;        // Immediate value
00299         int flags;              // Operand flags
00300 } OPERAND, *POPERAND;
00301 
00302 // struct INSTRUCTION is used to interface the library
00303 typedef struct _INSTRUCTION {
00304         int length;             // Instruction length
00305         enum Instruction type;  // Instruction type
00306         enum Mode mode;         // Addressing mode
00307         BYTE opcode;            // Actual opcode
00308         BYTE modrm;             // MODRM byte
00309         BYTE sib;               // SIB byte
00310         int extindex;           // Extension table index
00311         int fpuindex;           // FPU table index
00312         int dispbytes;          // Displacement bytes (0 = no displacement)
00313         int immbytes;           // Immediate bytes (0 = no immediate)
00314         int sectionbytes;       // Section prefix bytes (0 = no section prefix)
00315         OPERAND op1;            // First operand (if any)
00316         OPERAND op2;            // Second operand (if any)
00317         OPERAND op3;            // Additional operand (if any)
00318         PINST ptr;              // Pointer to instruction table
00319         int flags;              // Instruction flags
00320 } INSTRUCTION, *PINSTRUCTION;
00321 
00322 
00323 // Function definitions
00324 
00325 int get_instruction(
00326         INSTRUCTION *inst,      // pointer to INSTRUCTION structure
00327         BYTE *addr,             // code buffer
00328         enum Mode mode          // mode: MODE_32 or MODE_16
00329 );
00330 
00331 // Get complete instruction string
00332 int get_instruction_string(
00333         INSTRUCTION *inst,      // pointer to INSTRUCTION structure
00334         enum Format format,     // instruction format: FORMAT_ATT or FORMAT_INTEL
00335         DWORD offset,           // instruction absolute address
00336         char *string,           // string buffer
00337         int length              // string length
00338 );
00339 
00340 // Get mnemonic string
00341 int get_mnemonic_string(
00342         INSTRUCTION *inst,      // pointer to INSTRUCTION structure
00343         enum Format format,     // instruction format: FORMAT_ATT or FORMAT_INTEL
00344         char *string,           // string buffer
00345         int length              // string length
00346 );
00347 
00348 // Get individual operand string
00349 int get_operand_string(
00350         INSTRUCTION *inst,      // pointer to INSTRUCTION structure
00351         POPERAND op,            // pointer to OPERAND structure
00352         enum Format format,     // instruction format: FORMAT_ATT or FORMAT_INTEL
00353         DWORD offset,           // instruction absolute address
00354         char *string,           // string buffer
00355         int length              // string length
00356 );
00357 
00358 // Helper functions
00359 
00360 int get_register_type(
00361         POPERAND op
00362 );
00363 int get_operand_type(
00364         POPERAND op
00365 );
00366 int get_operand_register(
00367         POPERAND op
00368 );
00369 int get_operand_basereg(
00370         POPERAND op
00371 );
00372 int get_operand_indexreg(
00373         POPERAND op
00374 );
00375 int get_operand_scale(
00376         POPERAND op
00377 );
00378 int get_operand_immediate(
00379         POPERAND op,
00380         DWORD *imm              // returned immediate value
00381 );
00382 int get_operand_displacement(
00383         POPERAND op,
00384         DWORD *disp             // returned displacement value
00385 );
00386 POPERAND get_source_operand(
00387         PINSTRUCTION inst
00388 );
00389 POPERAND get_destination_operand(
00390         PINSTRUCTION inst
00391 );
00392 
00393 
00394 // Instruction flags (prefixes)
00395 
00396 // Group 1
00397 #define MASK_PREFIX_G1(x) ((x) & 0xff000000) >> 24
00398 #define PREFIX_LOCK                     0x01000000      // 0xf0
00399 #define PREFIX_REPNE                    0x02000000      // 0xf2
00400 #define PREFIX_REP                      0x03000000      // 0xf3
00401 #define PREFIX_REPE                     0x03000000      // 0xf3
00402 // Group 2
00403 #define MASK_PREFIX_G2(x) ((x) & 0x00ff0000) >> 16
00404 #define PREFIX_ES_OVERRIDE              0x00010000      // 0x26
00405 #define PREFIX_CS_OVERRIDE              0x00020000      // 0x2e
00406 #define PREFIX_SS_OVERRIDE              0x00030000      // 0x36
00407 #define PREFIX_DS_OVERRIDE              0x00040000      // 0x3e
00408 #define PREFIX_FS_OVERRIDE              0x00050000      // 0x64
00409 #define PREFIX_GS_OVERRIDE              0x00060000      // 0x65
00410 // Group 3 & 4
00411 #define MASK_PREFIX_G3(x)        ((x) & 0x0000ff00) >> 8
00412 #define MASK_PREFIX_OPERAND(x)   ((x) & 0x00000f00) >> 8
00413 #define MASK_PREFIX_ADDR(x)      ((x) & 0x0000f000) >> 12
00414 #define PREFIX_OPERAND_SIZE_OVERRIDE    0x00000100      // 0x66
00415 #define PREFIX_ADDR_SIZE_OVERRIDE       0x00001000      // 0x67
00416 
00417 // Extensions
00418 
00419 #define MASK_EXT(x) ((x) & 0x000000ff)
00420 #define EXT_G1_1        0x00000001
00421 #define EXT_G1_2        0x00000002
00422 #define EXT_G1_3        0x00000003
00423 #define EXT_G2_1        0x00000004
00424 #define EXT_G2_2        0x00000005
00425 #define EXT_G2_3        0x00000006
00426 #define EXT_G2_4        0x00000007
00427 #define EXT_G2_5        0x00000008
00428 #define EXT_G2_6        0x00000009
00429 #define EXT_G3_1        0x0000000a
00430 #define EXT_G3_2        0x0000000b
00431 #define EXT_G4          0x0000000c
00432 #define EXT_G5          0x0000000d
00433 #define EXT_G6          0x0000000e
00434 #define EXT_G7          0x0000000f
00435 #define EXT_G8          0x00000010
00436 #define EXT_G9          0x00000011
00437 #define EXT_GA          0x00000012
00438 #define EXT_GB          0x00000013
00439 #define EXT_GC          0x00000014
00440 #define EXT_GD          0x00000015
00441 #define EXT_GE          0x00000016
00442 #define EXT_GF          0x00000017
00443 #define EXT_G0          0x00000018
00444 
00445 // Extra groups for 2 and 3-byte opcodes, and FPU stuff
00446 #define EXT_T2          0x00000020      // opcode table 2
00447 #define EXT_CP          0x00000030      // co-processor
00448 
00449 // Instruction type flags
00450 
00451 #define TYPE_3          0x80000000
00452 #define MASK_TYPE_FLAGS(x) ((x) & 0xff000000)
00453 #define MASK_TYPE_VALUE(x) ((x) & 0x00ffffff)
00454 
00455 
00456 // Operand flags
00457 
00458 #define FLAGS_NONE 0
00459 
00460 // Operand Addressing Methods, from the Intel manual
00461 #define MASK_AM(x) ((x) & 0x00ff0000)
00462 #define AM_A 0x00010000         // Direct address with segment prefix
00463 #define AM_C 0x00020000         // MODRM reg field defines control register
00464 #define AM_D 0x00030000         // MODRM reg field defines debug register
00465 #define AM_E 0x00040000         // MODRM byte defines reg/memory address
00466 #define AM_G 0x00050000         // MODRM byte defines general-purpose reg
00467 #define AM_I 0x00060000         // Immediate data follows
00468 #define AM_J 0x00070000         // Immediate value is relative to EIP
00469 #define AM_M 0x00080000         // MODRM mod field can refer only to memory
00470 #define AM_O 0x00090000         // Displacement follows (without modrm/sib)
00471 #define AM_P 0x000a0000         // MODRM reg field defines MMX register
00472 #define AM_Q 0x000b0000         // MODRM defines MMX register or memory 
00473 #define AM_R 0x000c0000         // MODRM mod field can only refer to register
00474 #define AM_S 0x000d0000         // MODRM reg field defines segment register
00475 #define AM_T 0x000e0000         // MODRM reg field defines test register
00476 #define AM_V 0x000f0000         // MODRM reg field defines XMM register
00477 #define AM_W 0x00100000         // MODRM defines XMM register or memory 
00478 // Extra addressing modes used in this implementation
00479 #define AM_I1  0x00200000       // Immediate byte 1 encoded in instruction
00480 #define AM_REG 0x00210000       // Register encoded in instruction
00481 #define AM_IND 0x00220000       // Register indirect encoded in instruction
00482 
00483 // Operand Types, from the intel manual
00484 #define MASK_OT(x) ((x) & 0xff000000)
00485 #define OT_a  0x01000000
00486 #define OT_b  0x02000000        // always 1 byte
00487 #define OT_c  0x03000000        // byte or word, depending on operand
00488 #define OT_d  0x04000000        // double-word
00489 #define OT_q  0x05000000        // quad-word
00490 #define OT_dq 0x06000000        // double quad-word
00491 #define OT_v  0x07000000        // word or double-word, depending on operand
00492 #define OT_w  0x08000000        // always word
00493 #define OT_p  0x09000000        // 32-bit or 48-bit pointer
00494 #define OT_pi 0x0a000000        // quadword MMX register
00495 #define OT_pd 0x0b000000        // 128-bit double-precision float
00496 #define OT_ps 0x0c000000        // 128-bit single-precision float
00497 #define OT_s  0x0d000000        // 6-byte pseudo descriptor
00498 #define OT_sd 0x0e000000        // Scalar of 128-bit double-precision float
00499 #define OT_ss 0x0f000000        // Scalar of 128-bit single-precision float
00500 #define OT_si 0x10000000        // Doubleword integer register
00501 #define OT_t  0x11000000        // 80-bit packed FP data
00502 
00503 // Operand permissions
00504 #define MASK_PERMS(x) ((x) & 0x0000f000)
00505 #define P_r   0x00004000        // Read
00506 #define P_w   0x00002000        // Write
00507 #define P_x   0x00001000        // Execute
00508 
00509 // Additional operand flags
00510 #define MASK_FLAGS(x) ((x) & 0x00000f00)
00511 #define F_s   0x00000100        // sign-extend 1-byte immediate
00512 #define F_r   0x00000200        // use segment register
00513 #define F_f   0x00000400        // use FPU register
00514 
00515 // Mask 0x000000f0 unused atm
00516 
00517 // Operand register mask
00518 #define MASK_REG(x) ((x) & 0x0000000f)
00519 
00520 
00521 
00522 // MODRM byte
00523 #define MASK_MODRM_MOD(x) (((x) & 0xc0) >> 6)
00524 #define MASK_MODRM_REG(x) (((x) & 0x38) >> 3)
00525 #define MASK_MODRM_RM(x)   ((x) & 0x7)
00526 
00527 // SIB byte
00528 #define MASK_SIB_SCALE(x) MASK_MODRM_MOD(x)
00529 #define MASK_SIB_INDEX(x) MASK_MODRM_REG(x)
00530 #define MASK_SIB_BASE(x)  MASK_MODRM_RM(x)
00531 
00532 
00533 #ifdef __cplusplus
00534 }
00535 #endif
00536 
00537 #endif

Generated on Sun Jan 9 16:47:44 2011 for libemu by  doxygen 1.6.1