#include #include #include #include #include //TODO: Pass file struct and currently read bytes via param /* typedef enum ARITHMETIC_OP */ /* { */ /* ADD = 0b000, */ /* ADC = 0b010, */ /* SUB_NEG_IMUL = 0b101, */ /* SBB = 0b011, */ /* MUL = 0b100, */ /* DEC = 0b001, */ /* CMP = 0b111, */ /* } ARITHMETIC_OP; */ typedef enum COND_JMP_OPC { JX = 0b0111, LOOPX = 0b1110'00, } JMP_OPC; typedef enum LOOPX_INST { LOOP = 0b10, LOOPZ = 0b01, LOOPNZ = 0b00, JCXZ = 0b11 } LOOPX_INST; #define LOOPX_INST_TXT_LEN 7 char LOOPX_INST_TXT[0b100][LOOPX_INST_TXT_LEN] = { "loopnz", "loopz", "loop", "jcxz" }; typedef enum JX_INST { JZ = 0b0100, JL = 0b1100, JLE = 0b1110, JB = 0b0010, JBE = 0b0110, JP = 0b1010, JO = 0b0000, JS = 0b1000, JNE = 0b0101, JNL = 0b1101, JNLE = 0b1111, JNB = 0b0011, JNBE = 0b0111, JNP = 0b1011, JNO = 0b0001, JNS = 0b1001, } JX_INST; #define JX_INST_TXT_LEN 5 char JX_INST_TXT[0b10000][JX_INST_TXT_LEN] = { "jo", "jno", "jb", "jnb", "jz", "jne", "jbe", "jnbe", "js", "jns", "jp", "jnp", "jl", "jnl", "jle", "jnle" }; typedef enum ARITHMETIC_OP { ADD = 0b000, CMP = 0b111, SUB = 0b101 } ARITHMETIC_OP; #define ARITHMETIC_OP_TXT_LEN 4 char ARITHMETIC_OP_TXT[0b1000][ARITHMETIC_OP_TXT_LEN] = { [0b000] = "add", [0b111] = "cmp", [0b101] = "sub" }; typedef enum ASC_INST_MASK { ASC_REGM_R_M = 0b1100'01 , //d w // ASC_I_RM = 0b1111'11, //s w ASC_I_A_M = 0b1100'011 //w } ARITHMETIC_INST_MASK; typedef enum ASC_INST { ASC_REGM_R = 0b0, //d w ASC_I_RM = 0b1000'00, //s w ASC_I_A = 0b0000'010 //w } ARITHMETIC_INST; typedef enum DT_INSTRUCTIONS { MOV_RM_TF_R = 0b1000'10, //d w MOV_I_T_RM = 0b1100'011, // w MOV_I_T_R = 0b1011, // w reg MOV_M_T_A = 0b1010'000, // w MOV_A_T_M = 0b1010'001, // w MOV_RM_T_SR = 0b1000'1110, MOV_SR_T_RM = 0b1000'1100, } DT_INSTRUCTIONS; enum REGISTER_MOD { MEM_NO_DISP = 0b00, MEM_8BIT_DISP = 0b01, MEM_16BIT_DISP = 0b10, REG_NO_DISP = 0b11 }; #define REG_ENCODING_TXT_LEN 3 char REG_ENCODING_TXT[0b10000][REG_ENCODING_TXT_LEN] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; #define EA_ENCODING_TXT_LEN 8 char EA_ENCODING_TXT[0b1000][EA_ENCODING_TXT_LEN] = { "bx + si", "bx + di", "bp + si", "bp + di", "si", "di", "bp", "bx" }; #define WORD_SIGNAL_LEN 6 char WORD_SIGNAL_TXT[0b10][WORD_SIGNAL_LEN] = { "byte ", "word " }; /* enum REG_ENCODING */ /* { */ /* R_AL = 0b000, */ /* R_CL = 0b001, */ /* R_DL = 0b010, */ /* R_BL = 0b011, */ /* R_AH = 0b100, */ /* R_CH = 0b101, */ /* R_DH = 0b110, */ /* R_BH = 0b111 */ /* }; */ /* enum WREG_ENCODING */ /* { */ /* WR_AX = 0b000, */ /* WR_CX = 0b001, */ /* WR_DX = 0b010, */ /* WR_BX = 0b011, */ /* WR_SP = 0b100, */ /* WR_BP = 0b101, */ /* WR_SI = 0b110, */ /* WR_DI = 0b111 */ /* }; */ typedef struct { FILE *binary; uint64_t size; } binary_data; FILE *output; binary_data bin; int MOV_MA_T_AM_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read, bool is_M_T_A) { const uint8_t wide_mask = 0b0000'0001; uint8_t extra_bytes_read; unsigned char byte[1]; uint8_t acc_wide = 0; uint8_t high_addr = 0; uint16_t word_addr = byte2; int disp_len = 0; char ea_string[18] = {'\0'}; char *ea_string_write_ptr = ea_string; //Immediate value retrieval if (wide_mask & byte1) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_addr = (uint8_t)byte[0]; word_addr = word_addr + (high_addr << 8); } //target reg retrieval acc_wide = (((byte1 & wide_mask)) * 8); //Stringify address //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; disp_len = sprintf(ea_string_write_ptr, "%d", word_addr); ea_string_write_ptr += disp_len; *ea_string_write_ptr = ']'; //END printf("%s %s, %s\n", "mov", is_M_T_A ? REG_ENCODING_TXT[acc_wide] : ea_string, is_M_T_A ? ea_string : REG_ENCODING_TXT[acc_wide]); fprintf(output, "%s %s, %s\n", "mov", is_M_T_A ? REG_ENCODING_TXT[acc_wide] : ea_string, is_M_T_A ? ea_string : REG_ENCODING_TXT[acc_wide]); return extra_bytes_read; } int MOV_I_T_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'1000; const uint8_t reg_mask = 0b0000'0111; unsigned char byte[1]; uint8_t extra_bytes_read = 0; uint16_t full_im = byte2; uint8_t high_im = 0; uint8_t reg_value = 0; //Immediate value retrieval if (wide_mask & byte1) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_im = (uint8_t)byte[0]; full_im = full_im + (high_im << 8); } //target reg retrieval reg_value = (byte1 & reg_mask) + (((byte1 & wide_mask) >> 3) * 8); printf("%s %s, %d\n", "mov", REG_ENCODING_TXT[reg_value], full_im); fprintf(output, "%s %s, %d\n", "mov", REG_ENCODING_TXT[reg_value], full_im); return extra_bytes_read; } typedef struct { char* string; uint64_t len; } output_string; void fill_ea_string(output_string *str, char* content, int16_t displacement) { //EA STRING GENERATION char* str_beg = str->string; uint64_t disp_len = 0; *(str->string) = '['; str->string++; str->len++; memcpy(str->string, content, strlen(content)); str->string += strlen(content); str->len += strlen(content); if (displacement) { if (strlen(content) > 0) { *(str->string) = ' '; if(displacement >= 0) { *(str->string + 1) = '+'; *(str->string + 2) = ' '; str->string += 3; str->len += 3; } else { str->string += 1; str->len += 1; } } disp_len = sprintf(str->string, "%d", displacement); str->string += disp_len; str->len += disp_len; } *(str->string) = ']'; str->string = str_beg; str->len += 2; //END return; } int MOV_I_T_RM_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'0001; const uint8_t mod_mask = 0b1100'0000; const uint8_t regm_mask = 0b0000'0111; unsigned char byte[1]; unsigned char payload[4]; uint8_t high_disp = 0; uint16_t wide_disp = 0; uint8_t high_data = 0; uint16_t wide_data = 0; bool is_wide = wide_mask & byte1; uint8_t extra_bytes_read = 0; //Effective Address string compose vars char ea_string[18] = {'\0'}; char *ea_string_write_ptr = ea_string; //Immediate value string compose vars output_string mem_data; mem_data.len = 0; mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" //Number of chars disp value takes int disp_len = 0; enum REGISTER_MOD inst_mod = (byte2 & mod_mask) >> 6; uint8_t extra_bytes = 1 + is_wide; uint8_t reg_value = 0; uint8_t ea_table_value = (byte2 & regm_mask); switch (inst_mod) { case (REG_NO_DISP): printf("Oh. This happened.\n"); //This shouldn't happen, right? break; case (MEM_NO_DISP): //If R/M equals 110, there actually is displacement to worry about bool is_direct_address = (ea_table_value == 0b110); if (is_direct_address) extra_bytes += 1 + is_wide; for (int i = 0; i < extra_bytes; i++) { fread(&(payload[i]), sizeof(byte), 1, bin.binary); extra_bytes_read++; } //Composing displacement and immediate values //TODO: Data missing when 8bit data bug? if (is_direct_address) { wide_disp = payload[0]; if (is_wide) { wide_disp = (payload[1] << 8) + wide_disp; wide_data = payload[2]; wide_data = (payload[3] << 8) + wide_data; } else { wide_data = payload[1]; } } else { wide_data = payload[0]; if (is_wide) wide_data = (payload[1] << 8) + wide_data; } fill_ea_string(&mem_data, (is_direct_address ? "" : EA_ENCODING_TXT[ea_table_value]), wide_disp); printf("%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); fprintf(output, "%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); break; case (MEM_8BIT_DISP): //If R/M equals 110, there actually is displacement to worry about extra_bytes += 1; for (int i = 0; i < extra_bytes; i++) { fread(&(payload[i]), sizeof(byte), 1, bin.binary); extra_bytes_read++; } //Composing displacement and immediate values wide_disp = payload[0]; wide_data = payload[1]; if (is_wide) { wide_data = (payload[2] << 8) + wide_data; } fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], wide_disp); printf("%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); fprintf(output, "%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); break; case (MEM_16BIT_DISP): //If R/M equals 110, there actually is displacement to worry about extra_bytes += 2; for (int i = 0; i < extra_bytes; i++) { fread(&(payload[i]), sizeof(byte), 1, bin.binary); extra_bytes_read++; } //Composing displacement and immediate values wide_disp = payload[0]; wide_disp = (payload[1] << 8) + wide_disp; wide_data = payload[2]; if (is_wide) { wide_data = (payload[3] << 8) + wide_data; } fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], wide_disp); printf("%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); fprintf(output, "%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); break; } return extra_bytes_read; } int16_t calc_effective_disp(uint8_t high_order, uint8_t low_order, bool lo_only) { const uint8_t low_order_neg_mask = 0b1000'0000; int16_t effective_disp = 0; if (lo_only && ((low_order & low_order_neg_mask) >> 7)) high_order = high_order | 0b1111'1111; int8_t *effective_disp_high_order =(int8_t*) &(effective_disp) + 1; memcpy(&(effective_disp), &low_order, 1); memcpy(effective_disp_high_order, &high_order, 1); return effective_disp; } int MOV_RM_TF_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'0001; const uint8_t dest_mask = 0b0000'0010; const uint8_t mod_mask = 0b1100'0000; const uint8_t regm_mask = 0b0011'1000; unsigned char byte[1]; uint8_t low_disp = 0; uint8_t high_disp = 0; uint16_t wide_disp = 0; int16_t effective_disp = 0; uint8_t extra_bytes_read = 0; output_string mem_data; mem_data.len = 0; mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" bool reg_is_dest = (byte1 & dest_mask); uint8_t inst_mod = (byte2 & mod_mask) >> 6; //Effective Address string compose vars char ea_string[18] = {'\0'}; char *ea_string_write_ptr = ea_string; //Number of chars disp value takes int disp_len = 0; uint8_t reg_value = 0; uint8_t ea_table_value = 0; switch (inst_mod) { case (REG_NO_DISP): //Since we're doing register mode/register to register, both reg and r/m are affected by the W bit //if D=1, dest is retrieved from the reg field in byte2 and src from r/m uint8_t dest_value = (reg_is_dest) ? ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8) : (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8); uint8_t src_value = (reg_is_dest) ? (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8) : ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); printf("%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); fprintf(output, "%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); break; case (MEM_8BIT_DISP): fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); ea_table_value = (byte2 & (regm_mask >> 3)); effective_disp = calc_effective_disp(0, low_disp, true); //EA STRING GENERATION fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); //END printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); break; case (MEM_16BIT_DISP): fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_disp = (uint8_t)byte[0]; //Composing wide displacement effective_disp = calc_effective_disp(high_disp, low_disp, false); reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); ea_table_value = (byte2 & (regm_mask >> 3)); //EA STRING GENERATION fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); //END printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); break; case (MEM_NO_DISP): //Checking if special case 110 applies ea_table_value = (byte2 & (regm_mask >> 3)); //If R/M equals 110, there actually is displacement to worry about bool is_direct_address = (ea_table_value == 0b110); if (is_direct_address) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_disp = (uint8_t)byte[0]; //Composing wide displacement wide_disp = high_disp << 8; wide_disp = wide_disp | low_disp; } reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); if (is_direct_address) { //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; disp_len = sprintf(ea_string_write_ptr, "%d", wide_disp); ea_string_write_ptr += disp_len; *ea_string_write_ptr = ']'; //END } else { //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value])); ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]); *ea_string_write_ptr = ']'; //END } printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); break; } return extra_bytes_read; } int ASC_I_A_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'0001; const uint8_t op_mask = 0b0011'1000; uint8_t extra_bytes_read = 0; unsigned char byte[1]; uint8_t acc_wide = 0; uint8_t high_imm = 0; int16_t effective_imm = 0; int disp_len = 0; //Arithmetic op retrieval uint8_t a_op = (byte1 & op_mask) >> 3; //Immediate value retrieval if (wide_mask & byte1) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_imm = (uint8_t)byte[0]; effective_imm = calc_effective_disp(high_imm, byte2, false); } else effective_imm = calc_effective_disp(0, byte2, true); //target reg retrieval acc_wide = (((byte1 & wide_mask)) * 8); printf("%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[acc_wide], effective_imm); fprintf(output, "%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[acc_wide], effective_imm); return extra_bytes_read; } int ASC_I_RM_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'0001; const uint8_t sign_mask = 0b0000'0010; const uint8_t mod_mask = 0b1100'0000; const uint8_t regm_mask = 0b0000'0111; const uint8_t op_mask = 0b0011'1000; unsigned char byte[1]; unsigned char payload[4]; uint8_t low_disp = 0; uint8_t high_disp = 0; int16_t effective_disp = 0; uint8_t low_data = 0; uint8_t high_data = 0; int16_t effective_data = 0; bool is_wide = wide_mask & byte1; bool is_sign = sign_mask & byte1; uint8_t extra_bytes_read = 0; //Arithmetic op retrieval uint8_t a_op = (byte2 & op_mask) >> 3; //Immediate value string compose vars output_string mem_data; mem_data.len = 0; mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" //Number of chars disp value takes int disp_len = 0; enum REGISTER_MOD inst_mod = (byte2 & mod_mask) >> 6; uint8_t extra_bytes = 1 + (is_wide && !is_sign); uint8_t ea_table_value = (byte2 & regm_mask); uint8_t reg_value = ea_table_value; switch (inst_mod) { case (REG_NO_DISP): for (int i = 0; i < extra_bytes; i++) { fread(&(payload[i]), sizeof(byte), 1, bin.binary); extra_bytes_read++; } low_data = payload[0]; if (is_wide && !is_sign) { high_data = payload[1]; effective_data = calc_effective_disp(high_data, low_data, false); } else { effective_data = calc_effective_disp(0, low_data, true); } printf("%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[reg_value + (is_wide * 8)], effective_data); fprintf(output, "%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[reg_value + (is_wide * 8)], effective_data); break; case (MEM_NO_DISP): //If R/M equals 110, there actually is displacement to worry about bool is_direct_address = (ea_table_value == 0b110); if (is_direct_address) extra_bytes += 1 + is_wide; for (int i = 0; i < extra_bytes; i++) { fread(&(payload[i]), sizeof(byte), 1, bin.binary); extra_bytes_read++; } //Composing displacement and immediate values if (is_direct_address) { low_disp = payload[0]; high_disp = payload[1]; effective_disp = calc_effective_disp(high_disp, low_disp, false); low_data = payload[2]; if (is_wide && !is_sign) { high_data = payload[3]; effective_data = calc_effective_disp(high_data, low_data, false); } else { effective_data = calc_effective_disp(0, low_data, true); } } else { low_data = payload[0]; if (is_wide && !is_sign) { high_data = payload[1]; effective_data = calc_effective_disp(high_data, low_data, false); } else { effective_data = calc_effective_disp(0, low_data, true); } } fill_ea_string(&mem_data, (is_direct_address ? "" : EA_ENCODING_TXT[ea_table_value]), effective_disp); printf("%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); fprintf(output, "%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); break; case (MEM_8BIT_DISP): extra_bytes += 1; for (int i = 0; i < extra_bytes; i++) { fread(&(payload[i]), sizeof(byte), 1, bin.binary); extra_bytes_read++; } //Composing displacement and immediate values low_disp = payload[0]; effective_disp = calc_effective_disp(0, low_disp, true); low_data = payload[1]; if (is_wide && !is_sign) { high_data = payload[2]; effective_data = calc_effective_disp(high_data, low_data, false); } else { effective_data = calc_effective_disp(0, low_disp, true); } fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); printf("%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); fprintf(output, "%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); break; case (MEM_16BIT_DISP): extra_bytes += 2; for (int i = 0; i < extra_bytes; i++) { fread(&(payload[i]), sizeof(byte), 1, bin.binary); extra_bytes_read++; } //Composing displacement and immediate values low_disp = payload[0]; high_disp = payload[1]; effective_disp = calc_effective_disp(high_disp, low_disp, false); low_data = payload[2]; if (is_wide && !is_sign) { high_data = payload[3]; effective_data = calc_effective_disp(high_data, low_data, false); } else { effective_data = calc_effective_disp(0, low_data, true); } fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); printf("%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, WORD_SIGNAL_TXT[is_wide], effective_data); fprintf(output, "%s %s%s,%d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); break; } return extra_bytes_read; } int ASC_REGM_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'0001; const uint8_t dest_mask = 0b0000'0010; const uint8_t mod_mask = 0b1100'0000; const uint8_t regm_mask = 0b0011'1000; const uint8_t op_mask = 0b0011'1000; unsigned char byte[1]; uint8_t low_disp = 0; uint8_t high_disp = 0; uint16_t wide_disp = 0; int16_t effective_disp = 0; uint8_t extra_bytes_read = 0; uint8_t a_op = (byte1 & op_mask) >> 3; output_string mem_data; mem_data.len = 0; mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" bool reg_is_dest = (byte1 & dest_mask); uint8_t inst_mod = (byte2 & mod_mask) >> 6; //Number of chars disp value takes int disp_len = 0; uint8_t reg_value = 0; uint8_t ea_table_value = 0; switch (inst_mod) { case (REG_NO_DISP): //Since we're doing register mode/register to register, both reg and r/m are affected by the W bit //if D=1, dest is retrieved from the reg field in byte2 and src from r/m uint8_t dest_value = (reg_is_dest) ? ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8) : (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8); uint8_t src_value = (reg_is_dest) ? (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8) : ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); break; case (MEM_8BIT_DISP): fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); ea_table_value = (byte2 & (regm_mask >> 3)); effective_disp = calc_effective_disp(0, low_disp, true); //EA STRING GENERATION fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); //END printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); break; case (MEM_16BIT_DISP): fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_disp = (uint8_t)byte[0]; //Composing wide displacement effective_disp = calc_effective_disp(high_disp, low_disp, false); reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); ea_table_value = (byte2 & (regm_mask >> 3)); //EA STRING GENERATION fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); //END printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); break; case (MEM_NO_DISP): //Checking if special case 110 applies ea_table_value = (byte2 & (regm_mask >> 3)); //If R/M equals 110, there actually is displacement to worry about bool is_direct_address = (ea_table_value == 0b110); if (is_direct_address) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_disp = (uint8_t)byte[0]; //Composing wide displacement effective_disp = calc_effective_disp(high_disp, low_disp, false); } reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); //EA STRING GENERATION fill_ea_string(&mem_data, (is_direct_address ? " " : EA_ENCODING_TXT[ea_table_value]), effective_disp); //END printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] : mem_data.string) , (reg_is_dest ? mem_data.string : REG_ENCODING_TXT[reg_value])); break; } return extra_bytes_read; } void JMP_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read, JMP_OPC op) { const uint8_t loopx_mask = 0b0000'0011; const uint8_t jx_mask = 0b0000'1111; int8_t disp = 0; memcpy(&disp, &byte2, sizeof(uint8_t)); uint8_t jump_op = (op == LOOPX) ? (byte1 & loopx_mask) : (byte1 & jx_mask); char nasm_offset[4] = { "\0" }; if (!disp) strcpy(nasm_offset, "$+"); else strcpy(nasm_offset, "$+2"); //We have to add +2 to displacement so that NASM can parse displacements correctly printf("%s %s%+d \n", (op == LOOPX) ? LOOPX_INST_TXT[jump_op] : JX_INST_TXT[jump_op], nasm_offset, disp); fprintf(output, "%s %s%+d \n", (op == LOOPX) ? LOOPX_INST_TXT[jump_op] : JX_INST_TXT[jump_op], nasm_offset, disp); } int main(int argc, char** argv) { if (argc != 2) return -1; if (CHAR_BIT != 8) return -2; unsigned char byte[1]; bin.binary = fopen(argv[1], "rb"); fseek(bin.binary, 0, SEEK_END); bin.size = ftell(bin.binary); fseek(bin.binary, 0, SEEK_SET); output = fopen("output.asm", "w"); fprintf(output, "%s\n\n", "bits 16"); for (int bytes_read = 0; bytes_read < bin.size; bytes_read++) { uint8_t inst_byte2 = 0; fread(byte, sizeof(byte), 1, bin.binary); uint8_t manip_inst = (uint8_t)byte[0]; uint8_t inst = (uint8_t)byte[0]; //First, we check for ASC or JMPs. manip_inst = (manip_inst >> 1); if ((manip_inst & ASC_I_A_M) == ASC_I_A) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; bytes_read += ASC_I_A_parse(inst, inst_byte2, 0, 0); continue; } manip_inst = (inst >> 2); if (manip_inst == LOOPX) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; JMP_parse(inst, inst_byte2, 0, 0, LOOPX); continue; } if ((manip_inst & ASC_REGM_R_M) == ASC_REGM_R) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; bytes_read += ASC_REGM_R_parse(inst, inst_byte2, 0, 0); continue; } if (manip_inst == ASC_I_RM) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; bytes_read += ASC_I_RM_parse(inst, inst_byte2, 0, 0); continue; } manip_inst = (inst >> 4); if (manip_inst == JX) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; JMP_parse(inst, inst_byte2, 0, 0, JX); continue; } //Then, we're checking for all MOV except segment registers manip_inst = (inst >> 1); switch (manip_inst) { case MOV_M_T_A: case MOV_A_T_M: if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; bytes_read += MOV_MA_T_AM_parse(inst, inst_byte2, 0, 0, (manip_inst == MOV_M_T_A)); continue; case MOV_I_T_RM: if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; bytes_read += MOV_I_T_RM_parse(inst, inst_byte2, 0, 0); continue; } manip_inst = (manip_inst >> 1); switch (manip_inst) { case MOV_RM_TF_R: if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; bytes_read += MOV_RM_TF_R_parse(inst, inst_byte2, 0, 0); continue; } manip_inst = (manip_inst >> 2); if (manip_inst == MOV_I_T_R) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; inst_byte2 = (uint8_t)byte[0]; bytes_read += MOV_I_T_R_parse(inst, inst_byte2, 0, 0); continue; } } return 0; }