#include #include #include #include enum DT_INSTRUCTIONS { MOV_RM_TF_R = 0b1000'10, //d w MOV_I_T_RM = 0b1100'011, // w MOV_I_T_R = 0b1011, // w reg MOV_M_T_A = 0b1010'000, // w MOV_A_T_M = 0b1010'001, // w MOV_RM_T_SR = 0b1000'1110, MOV_SR_T_RM = 0b1000'1100, }; enum MOV_REGISTER_MODE { MEM_NO_DISP = 0b00, MEM_8BIT_DISP = 0b01, MEM_16BIT_DISP = 0b010, REG_NO_DISP = 0b11 }; #define REG_ENCODING_TXT_LEN 3 char REG_ENCODING_TXT[0b10000][REG_ENCODING_TXT_LEN] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", "ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}; #define EA_ENCODING_TXT_LEN 8 char EA_ENCODING_TXT[0b1000][EA_ENCODING_TXT_LEN] = { "bx + si", "bx + di", "bp + si", "bp + di", "si", "di", "bp", "bx"}; /* enum REG_ENCODING */ /* { */ /* R_AL = 0b000, */ /* R_CL = 0b001, */ /* R_DL = 0b010, */ /* R_BL = 0b011, */ /* R_AH = 0b100, */ /* R_CH = 0b101, */ /* R_DH = 0b110, */ /* R_BH = 0b111 */ /* }; */ /* enum WREG_ENCODING */ /* { */ /* WR_AX = 0b000, */ /* WR_CX = 0b001, */ /* WR_DX = 0b010, */ /* WR_BX = 0b011, */ /* WR_SP = 0b100, */ /* WR_BP = 0b101, */ /* WR_SI = 0b110, */ /* WR_DI = 0b111 */ /* }; */ typedef struct { FILE *binary; uint64_t size; } binary_data; FILE *output; binary_data bin; int MOV_MA_T_AM_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read, bool is_M_T_A) { const uint8_t wide_mask = 0b0000'0001; uint8_t extra_bytes_read; unsigned char byte[1]; uint8_t acc_wide = 0; uint8_t high_addr = 0; uint16_t word_addr = byte2; int disp_len = 0; char ea_string[18] = {'\0'}; char *ea_string_write_ptr = ea_string; //Immediate value retrieval if (wide_mask & byte1) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_addr = (uint8_t)byte[0]; word_addr = word_addr + (high_addr << 8); } //target reg retrieval acc_wide = (((byte1 & wide_mask)) * 8); //Stringify address //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; disp_len = sprintf(ea_string_write_ptr, "%d", word_addr); ea_string_write_ptr += disp_len; *ea_string_write_ptr = ']'; //END printf("%s %s, %s\n", "mov", is_M_T_A ? REG_ENCODING_TXT[acc_wide] : ea_string, is_M_T_A ? ea_string : REG_ENCODING_TXT[acc_wide]); fprintf(output, "%s %s, %s\n", "mov", is_M_T_A ? REG_ENCODING_TXT[acc_wide] : ea_string, is_M_T_A ? ea_string : REG_ENCODING_TXT[acc_wide]); return extra_bytes_read; } int MOV_I_T_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'1000; const uint8_t reg_mask = 0b0000'0111; unsigned char byte[1]; uint8_t extra_bytes_read = 0; uint16_t full_im = byte2; uint8_t high_im = 0; uint8_t reg_value = 0; //Immediate value retrieval if (wide_mask & byte1) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_im = (uint8_t)byte[0]; full_im = full_im + (high_im << 8); } //target reg retrieval reg_value = (byte1 & reg_mask) + (((byte1 & wide_mask) >> 3) * 8); printf("%s %s, %d\n", "mov", REG_ENCODING_TXT[reg_value], full_im); fprintf(output, "%s %s, %d\n", "mov", REG_ENCODING_TXT[reg_value], full_im); return extra_bytes_read; } int MOV_RM_TF_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) { const uint8_t wide_mask = 0b0000'0001; const uint8_t dest_mask = 0b0000'0010; const uint8_t mod_mask = 0b1100'0000; const uint8_t regm_mask = 0b0011'1000; unsigned char byte[1]; uint8_t low_disp = 0; uint8_t high_disp = 0; uint16_t wide_disp = 0; uint8_t extra_bytes_read = 0; bool reg_is_dest = (byte1 & dest_mask); uint8_t inst_mod = (byte2 & mod_mask) >> 6; //Effective Address string compose vars char ea_string[18] = {'\0'}; char *ea_string_write_ptr = ea_string; //Number of chars disp value takes int disp_len = 0; uint8_t reg_value = 0; uint8_t ea_table_value = 0; switch (inst_mod) { case (REG_NO_DISP): //Since we're doing register mode/register to register, both reg and r/m are affected by the W bit //if D=1, dest is retrieved from the reg field in byte2 and src from r/m uint8_t dest_value = (reg_is_dest) ? ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8) : (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8); uint8_t src_value = (reg_is_dest) ? (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8) : ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); printf("%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); fprintf(output, "%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); break; case (MEM_8BIT_DISP): fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); ea_table_value = (byte2 & (regm_mask >> 3)); //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value])); ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]); memcpy(ea_string_write_ptr, " + ", 3); ea_string_write_ptr += 3; disp_len = sprintf(ea_string_write_ptr, "%d", low_disp); ea_string_write_ptr += disp_len; *ea_string_write_ptr = ']'; //END printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); break; case (MEM_16BIT_DISP): fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_disp = (uint8_t)byte[0]; //Composing wide displacement wide_disp = high_disp << 8; wide_disp = wide_disp | low_disp; reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); ea_table_value = (byte2 & (regm_mask >> 3)); //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value])); ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]); memcpy(ea_string_write_ptr, " + ", 3); ea_string_write_ptr += 3; disp_len = sprintf(ea_string_write_ptr, "%d", wide_disp); ea_string_write_ptr += disp_len; *ea_string_write_ptr = ']'; //END printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); break; case (MEM_NO_DISP): //Checking if special case 110 applies ea_table_value = (byte2 & (regm_mask >> 3)); //If R/M equals 110, there actually is displacement to worry about bool is_direct_address = (ea_table_value == 0b110); if (is_direct_address) { fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; low_disp = (uint8_t)byte[0]; fread(byte, sizeof(byte), 1, bin.binary); extra_bytes_read++; high_disp = (uint8_t)byte[0]; //Composing wide displacement wide_disp = high_disp << 8; wide_disp = wide_disp | low_disp; } reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); if (is_direct_address) { //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; disp_len = sprintf(ea_string_write_ptr, "%d", wide_disp); ea_string_write_ptr += disp_len; *ea_string_write_ptr = ']'; //END } else { //EA STRING GENERATION *ea_string_write_ptr = '['; ea_string_write_ptr++; memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value])); ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]); *ea_string_write_ptr = ']'; //END } printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] : ea_string) , (reg_is_dest ? ea_string : REG_ENCODING_TXT[reg_value])); break; } return extra_bytes_read; } int main(int argc, char** argv) { if (argc != 2) return -1; if (CHAR_BIT != 8) return -2; unsigned char byte[1]; bin.binary = fopen(argv[1], "rb"); fseek(bin.binary, 0, SEEK_END); bin.size = ftell(bin.binary); fseek(bin.binary, 0, SEEK_SET); output = fopen("output.asm", "w"); fprintf(output, "%s\n\n", "bits 16"); for (int bytes_read = 0; bytes_read < bin.size; bytes_read++) { fread(byte, sizeof(byte), 1, bin.binary); uint8_t manip_inst = (uint8_t)byte[0]; uint8_t inst = (uint8_t)byte[0]; //We're checking for all MOV except segment registers manip_inst = (manip_inst >> 1); if (manip_inst == MOV_M_T_A || manip_inst == MOV_A_T_M) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; uint8_t inst_byte2 = (uint8_t)byte[0]; bytes_read += MOV_MA_T_AM_parse(inst, inst_byte2, 0, 0, (manip_inst == MOV_M_T_A)); continue; } manip_inst = (manip_inst >> 1); if (manip_inst == MOV_RM_TF_R) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; uint8_t inst_byte2 = (uint8_t)byte[0]; bytes_read += MOV_RM_TF_R_parse(inst, inst_byte2, 0, 0); continue; } manip_inst = (manip_inst >> 2); if (manip_inst == MOV_I_T_R) { if (bytes_read >= bin.size) break; fread(byte, sizeof(byte), 1, bin.binary); bytes_read++; uint8_t inst_byte2 = (uint8_t)byte[0]; bytes_read += MOV_I_T_R_parse(inst, inst_byte2, 0, 0); continue; } } return 0; }