diff --git a/1-1/compile.sh b/1-1/compile.sh index e3ecbb0..fff3fa0 100755 --- a/1-1/compile.sh +++ b/1-1/compile.sh @@ -1,3 +1,4 @@ #! /usr/bin/env bash +SCRIPT_LOC=$( dirname -- $( readlink -f -- "$0"; )) -gcc 1-1/decoder.c -O0 -g -gdwarf -o 1-1/8086coded +gcc $SCRIPT_LOC/decoder.c -O0 -g -gdwarf -o $SCRIPT_LOC/8086coded diff --git a/1-1/nasm-compile.sh b/1-1/nasm-compile.sh index 4dcedb9..596c0c4 100644 --- a/1-1/nasm-compile.sh +++ b/1-1/nasm-compile.sh @@ -1,4 +1,5 @@ -#! /usr/env/bin bash +#! /usr/bin/env bash +SCRIPT_LOC=$( dirname -- $( readlink -f -- "$0"; )) -nasm -f bin 1-1/listing-38.asm -o 1-1/listing-37 -nasm -f bin 1-1/listing-38.asm -o 1-1/listing-38 +nasm -f bin $SCRIPT_LOC/listing-39.asm -o $SCRIPT_LOC/listing-39 +nasm -f bin $SCRIPT_LOC/listing-40.asm -o $SCRIPT_LOC/listing-40 diff --git a/1-2/compile.sh b/1-2/compile.sh index 04ac9ca..fff3fa0 100755 --- a/1-2/compile.sh +++ b/1-2/compile.sh @@ -1,3 +1,4 @@ #! /usr/bin/env bash +SCRIPT_LOC=$( dirname -- $( readlink -f -- "$0"; )) -gcc 1-2/decoder.c -O0 -g -gdwarf -o 1-2/8086coded +gcc $SCRIPT_LOC/decoder.c -O0 -g -gdwarf -o $SCRIPT_LOC/8086coded diff --git a/1-2/listing-40 b/1-2/listing-40 index 13c1744..3610804 100644 Binary files a/1-2/listing-40 and b/1-2/listing-40 differ diff --git a/1-2/nasm-compile.sh b/1-2/nasm-compile.sh index a4c0a5a..596c0c4 100755 --- a/1-2/nasm-compile.sh +++ b/1-2/nasm-compile.sh @@ -1,4 +1,5 @@ #! /usr/bin/env bash +SCRIPT_LOC=$( dirname -- $( readlink -f -- "$0"; )) -nasm -f bin 1-2/listing-39.asm -o 1-2/listing-39 -nasm -f bin 1-2/listing-40.asm -o 1-2/listing-40 +nasm -f bin $SCRIPT_LOC/listing-39.asm -o $SCRIPT_LOC/listing-39 +nasm -f bin $SCRIPT_LOC/listing-40.asm -o $SCRIPT_LOC/listing-40 diff --git a/1-3/8086coded b/1-3/8086coded new file mode 100755 index 0000000..7bb3890 Binary files /dev/null and b/1-3/8086coded differ diff --git a/1-3/compile.sh b/1-3/compile.sh new file mode 100755 index 0000000..fff3fa0 --- /dev/null +++ b/1-3/compile.sh @@ -0,0 +1,4 @@ +#! /usr/bin/env bash +SCRIPT_LOC=$( dirname -- $( readlink -f -- "$0"; )) + +gcc $SCRIPT_LOC/decoder.c -O0 -g -gdwarf -o $SCRIPT_LOC/8086coded diff --git a/1-3/decoder.c b/1-3/decoder.c new file mode 100644 index 0000000..ffcb8d8 --- /dev/null +++ b/1-3/decoder.c @@ -0,0 +1,925 @@ +#include +#include +#include +#include +#include + +//TODO: Pass file struct and currently read bytes via param + +/* typedef enum ARITHMETIC_OP */ +/* { */ +/* ADD = 0b000, */ +/* ADC = 0b010, */ +/* SUB_NEG_IMUL = 0b101, */ +/* SBB = 0b011, */ +/* MUL = 0b100, */ +/* DEC = 0b001, */ +/* CMP = 0b111, */ +/* } ARITHMETIC_OP; */ + +typedef enum ARITHMETIC_OP +{ + ADD = 0b000, + CMP = 0b111, + SUB = 0b101 +} ARITHMETIC_OP; + +#define ARITHMETIC_OP_TXT_LEN 4 +char ARITHMETIC_OP_TXT[0b1000][ARITHMETIC_OP_TXT_LEN] = { [0b000] = "add", [0b111] = "cmp", [0b101] = "sub" }; + +typedef enum ASC_INST_MASK +{ + ASC_REGM_R_M = 0b1100'01 , //d w + // ASC_I_RM = 0b1111'11, //s w + ASC_I_A_M = 0b1100'011 //w +} ARITHMETIC_INST_MASK; + +typedef enum ASC_INST +{ + ASC_REGM_R = 0b0, //d w + ASC_I_RM = 0b1000'00, //s w + ASC_I_A = 0b0000'010 //w +} ARITHMETIC_INST; + +typedef enum DT_INSTRUCTIONS +{ + MOV_RM_TF_R = 0b1000'10, //d w + MOV_I_T_RM = 0b1100'011, // w + MOV_I_T_R = 0b1011, // w reg + MOV_M_T_A = 0b1010'000, // w + MOV_A_T_M = 0b1010'001, // w + MOV_RM_T_SR = 0b1000'1110, + MOV_SR_T_RM = 0b1000'1100, +} DT_INSTRUCTIONS; + +enum REGISTER_MOD + { + MEM_NO_DISP = 0b00, + MEM_8BIT_DISP = 0b01, + MEM_16BIT_DISP = 0b10, + REG_NO_DISP = 0b11 + }; + +#define REG_ENCODING_TXT_LEN 3 +char REG_ENCODING_TXT[0b10000][REG_ENCODING_TXT_LEN] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", + "ax", "cx", "dx", "bx", "sp", "bp", "si", "di" }; + +#define EA_ENCODING_TXT_LEN 8 +char EA_ENCODING_TXT[0b1000][EA_ENCODING_TXT_LEN] = { "bx + si", "bx + di", "bp + si", "bp + di", + "si", "di", "bp", "bx" }; + +#define WORD_SIGNAL_LEN 6 +char WORD_SIGNAL_TXT[0b10][WORD_SIGNAL_LEN] = { "byte ", "word " }; + +/* enum REG_ENCODING */ +/* { */ +/* R_AL = 0b000, */ +/* R_CL = 0b001, */ +/* R_DL = 0b010, */ +/* R_BL = 0b011, */ +/* R_AH = 0b100, */ +/* R_CH = 0b101, */ +/* R_DH = 0b110, */ +/* R_BH = 0b111 */ +/* }; */ + +/* enum WREG_ENCODING */ +/* { */ +/* WR_AX = 0b000, */ +/* WR_CX = 0b001, */ +/* WR_DX = 0b010, */ +/* WR_BX = 0b011, */ +/* WR_SP = 0b100, */ +/* WR_BP = 0b101, */ +/* WR_SI = 0b110, */ +/* WR_DI = 0b111 */ +/* }; */ + +typedef struct +{ + FILE *binary; + uint64_t size; +} binary_data; + +FILE *output; +binary_data bin; + +int MOV_MA_T_AM_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read, bool is_M_T_A) +{ + const uint8_t wide_mask = 0b0000'0001; + uint8_t extra_bytes_read; + unsigned char byte[1]; + uint8_t acc_wide = 0; + uint8_t high_addr = 0; + uint16_t word_addr = byte2; + + int disp_len = 0; + char ea_string[18] = {'\0'}; + char *ea_string_write_ptr = ea_string; + + //Immediate value retrieval + if (wide_mask & byte1) + { + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + high_addr = (uint8_t)byte[0]; + word_addr = word_addr + (high_addr << 8); + } + + //target reg retrieval + acc_wide = (((byte1 & wide_mask)) * 8); + + //Stringify address + //EA STRING GENERATION + *ea_string_write_ptr = '['; + ea_string_write_ptr++; + disp_len = sprintf(ea_string_write_ptr, "%d", word_addr); + ea_string_write_ptr += disp_len; + *ea_string_write_ptr = ']'; + //END + + printf("%s %s, %s\n", "mov", is_M_T_A ? REG_ENCODING_TXT[acc_wide] : ea_string, + is_M_T_A ? ea_string : REG_ENCODING_TXT[acc_wide]); + fprintf(output, "%s %s, %s\n", "mov", is_M_T_A ? REG_ENCODING_TXT[acc_wide] : ea_string, + is_M_T_A ? ea_string : REG_ENCODING_TXT[acc_wide]); + + return extra_bytes_read; +} + +int MOV_I_T_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) +{ + const uint8_t wide_mask = 0b0000'1000; + const uint8_t reg_mask = 0b0000'0111; + unsigned char byte[1]; + uint8_t extra_bytes_read = 0; + uint16_t full_im = byte2; + uint8_t high_im = 0; + uint8_t reg_value = 0; + + //Immediate value retrieval + if (wide_mask & byte1) + { + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + high_im = (uint8_t)byte[0]; + full_im = full_im + (high_im << 8); + } + + //target reg retrieval + reg_value = (byte1 & reg_mask) + (((byte1 & wide_mask) >> 3) * 8); + + printf("%s %s, %d\n", "mov", REG_ENCODING_TXT[reg_value], full_im); + fprintf(output, "%s %s, %d\n", "mov", REG_ENCODING_TXT[reg_value], full_im); + + return extra_bytes_read; +} + +typedef struct { + char* string; + uint64_t len; +} output_string; + +void fill_ea_string(output_string *str, char* content, int16_t displacement) +{ + //EA STRING GENERATION + char* str_beg = str->string; + uint64_t disp_len = 0; + + *(str->string) = '['; + str->string++; + str->len++; + memcpy(str->string, content, strlen(content)); + str->string += strlen(content); + str->len += strlen(content); + if (displacement) + { + if (strlen(content) > 0) + { + *(str->string) = ' '; + if(displacement >= 0) + { + *(str->string + 1) = '+'; + *(str->string + 2) = ' '; + str->string += 3; + str->len += 3; + } + else + { + str->string += 1; + str->len += 1; + } + } + disp_len = sprintf(str->string, "%d", displacement); + str->string += disp_len; + str->len += disp_len; + } + *(str->string) = ']'; + str->string = str_beg; + str->len += 2; + //END + + return; +} + +int MOV_I_T_RM_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) +{ + const uint8_t wide_mask = 0b0000'0001; + const uint8_t mod_mask = 0b1100'0000; + const uint8_t regm_mask = 0b0000'0111; + + unsigned char byte[1]; + unsigned char payload[4]; + uint8_t high_disp = 0; + uint16_t wide_disp = 0; + uint8_t high_data = 0; + uint16_t wide_data = 0; + bool is_wide = wide_mask & byte1; + + uint8_t extra_bytes_read = 0; + + //Effective Address string compose vars + char ea_string[18] = {'\0'}; + char *ea_string_write_ptr = ea_string; + + //Immediate value string compose vars + output_string mem_data; + mem_data.len = 0; + mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" + + //Number of chars disp value takes + int disp_len = 0; + enum REGISTER_MOD inst_mod = (byte2 & mod_mask) >> 6; + uint8_t extra_bytes = 1 + is_wide; + + uint8_t reg_value = 0; + uint8_t ea_table_value = (byte2 & regm_mask); + + switch (inst_mod) + { + case (REG_NO_DISP): + printf("Oh. This happened.\n"); + //This shouldn't happen, right? + break; + case (MEM_NO_DISP): + //If R/M equals 110, there actually is displacement to worry about + bool is_direct_address = (ea_table_value == 0b110); + if (is_direct_address) + extra_bytes += 1 + is_wide; + for (int i = 0; i < extra_bytes; i++) + { + fread(&(payload[i]), sizeof(byte), 1, bin.binary); + extra_bytes_read++; + } + + //Composing displacement and immediate values + //TODO: Data missing when 8bit data bug? + if (is_direct_address) + { + wide_disp = payload[0]; + if (is_wide) + { + wide_disp = (payload[1] << 8) + wide_disp; + wide_data = payload[2]; + wide_data = (payload[3] << 8) + wide_data; + } + else + { + wide_data = payload[1]; + } + } + else + { + wide_data = payload[0]; + if (is_wide) + wide_data = (payload[1] << 8) + wide_data; + } + fill_ea_string(&mem_data, (is_direct_address ? "" : EA_ENCODING_TXT[ea_table_value]), wide_disp); + printf("%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); + fprintf(output, "%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); + break; + case (MEM_8BIT_DISP): + //If R/M equals 110, there actually is displacement to worry about + extra_bytes += 1; + for (int i = 0; i < extra_bytes; i++) + { + fread(&(payload[i]), sizeof(byte), 1, bin.binary); + extra_bytes_read++; + } + + //Composing displacement and immediate values + wide_disp = payload[0]; + wide_data = payload[1]; + if (is_wide) + { + wide_data = (payload[2] << 8) + wide_data; + } + + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], wide_disp); + printf("%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); + fprintf(output, "%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); + break; + case (MEM_16BIT_DISP): + //If R/M equals 110, there actually is displacement to worry about + extra_bytes += 2; + for (int i = 0; i < extra_bytes; i++) + { + fread(&(payload[i]), sizeof(byte), 1, bin.binary); + extra_bytes_read++; + } + + //Composing displacement and immediate values + wide_disp = payload[0]; + wide_disp = (payload[1] << 8) + wide_disp; + + wide_data = payload[2]; + if (is_wide) + { + wide_data = (payload[3] << 8) + wide_data; + } + + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], wide_disp); + printf("%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); + fprintf(output, "%s %s, %s%d\n", "mov", mem_data.string, WORD_SIGNAL_TXT[is_wide], wide_data); + break; + } + + return extra_bytes_read; +} + +int16_t calc_effective_disp(uint8_t high_order, uint8_t low_order, bool lo_only) +{ + const uint8_t low_order_neg_mask = 0b1000'0000; + int16_t effective_disp = 0; + + if (lo_only & ((low_order & low_order_neg_mask) >> 7)) + high_order = high_order | 0b1111'1111; + int8_t *effective_disp_high_order =(int8_t*) &(effective_disp) + 1; + memcpy(&(effective_disp), &low_order, 1); + memcpy(effective_disp_high_order, &high_order, 1); + return effective_disp; +} + +int MOV_RM_TF_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) +{ + const uint8_t wide_mask = 0b0000'0001; + const uint8_t dest_mask = 0b0000'0010; + const uint8_t mod_mask = 0b1100'0000; + const uint8_t regm_mask = 0b0011'1000; + + unsigned char byte[1]; + uint8_t low_disp = 0; + uint8_t high_disp = 0; + uint16_t wide_disp = 0; + int16_t effective_disp = 0; + uint8_t extra_bytes_read = 0; + + output_string mem_data; + mem_data.len = 0; + mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" + + bool reg_is_dest = (byte1 & dest_mask); + uint8_t inst_mod = (byte2 & mod_mask) >> 6; + + //Effective Address string compose vars + char ea_string[18] = {'\0'}; + char *ea_string_write_ptr = ea_string; + + //Number of chars disp value takes + int disp_len = 0; + + uint8_t reg_value = 0; + uint8_t ea_table_value = 0; + switch (inst_mod) + { + case (REG_NO_DISP): + //Since we're doing register mode/register to register, both reg and r/m are affected by the W bit + //if D=1, dest is retrieved from the reg field in byte2 and src from r/m + uint8_t dest_value = (reg_is_dest) ? ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8) + : (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8); + uint8_t src_value = (reg_is_dest) ? (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8) + : ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + printf("%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); + fprintf(output, "%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); + break; + case (MEM_8BIT_DISP): + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + low_disp = (uint8_t)byte[0]; + + reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + ea_table_value = (byte2 & (regm_mask >> 3)); + effective_disp = calc_effective_disp(0, low_disp, true); + + //EA STRING GENERATION + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); + //END + + printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + break; + case (MEM_16BIT_DISP): + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + low_disp = (uint8_t)byte[0]; + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + high_disp = (uint8_t)byte[0]; + + //Composing wide displacement + effective_disp = calc_effective_disp(high_disp, low_disp, false); + + reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + ea_table_value = (byte2 & (regm_mask >> 3)); + + //EA STRING GENERATION + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); + //END + + printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + break; + case (MEM_NO_DISP): + //Checking if special case 110 applies + ea_table_value = (byte2 & (regm_mask >> 3)); + //If R/M equals 110, there actually is displacement to worry about + bool is_direct_address = (ea_table_value == 0b110); + if (is_direct_address) + { + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + low_disp = (uint8_t)byte[0]; + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + high_disp = (uint8_t)byte[0]; + + //Composing wide displacement + wide_disp = high_disp << 8; + wide_disp = wide_disp | low_disp; + } + + reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + + if (is_direct_address) + { + //EA STRING GENERATION + *ea_string_write_ptr = '['; + ea_string_write_ptr++; + disp_len = sprintf(ea_string_write_ptr, "%d", wide_disp); + ea_string_write_ptr += disp_len; + *ea_string_write_ptr = ']'; + //END + } + else + { + //EA STRING GENERATION + *ea_string_write_ptr = '['; + ea_string_write_ptr++; + memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value])); + ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]); + *ea_string_write_ptr = ']'; + //END + } + + printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : ea_string) + , (reg_is_dest ? ea_string + : REG_ENCODING_TXT[reg_value])); + fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : ea_string) + , (reg_is_dest ? ea_string + : REG_ENCODING_TXT[reg_value])); + break; + } + return extra_bytes_read; +} + +int ASC_I_A_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) +{ + const uint8_t wide_mask = 0b0000'0001; + const uint8_t op_mask = 0b0011'1000; + uint8_t extra_bytes_read = 0; + unsigned char byte[1]; + uint8_t acc_wide = 0; + uint8_t high_imm = 0; + int16_t effective_imm = 0; + + int disp_len = 0; + + //Arithmetic op retrieval + uint8_t a_op = (byte1 & op_mask) >> 3; + + //Immediate value retrieval + if (wide_mask & byte1) + { + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + high_imm = (uint8_t)byte[0]; + effective_imm = calc_effective_disp(high_imm, byte2, false); + } + else + effective_imm = calc_effective_disp(0, byte2, true); + + //target reg retrieval + acc_wide = (((byte1 & wide_mask)) * 8); + + printf("%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[acc_wide], effective_imm); + fprintf(output, "%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[acc_wide], effective_imm); + + return extra_bytes_read; +} + +int ASC_I_RM_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) +{ + const uint8_t wide_mask = 0b0000'0001; + const uint8_t sign_mask = 0b0000'0010; + const uint8_t mod_mask = 0b1100'0000; + const uint8_t regm_mask = 0b0000'0111; + const uint8_t op_mask = 0b0011'1000; + + unsigned char byte[1]; + unsigned char payload[4]; + uint8_t low_disp = 0; + uint8_t high_disp = 0; + int16_t effective_disp = 0; + uint8_t low_data = 0; + uint8_t high_data = 0; + int16_t effective_data = 0; + bool is_wide = wide_mask & byte1; + bool is_sign = sign_mask & byte1; + + uint8_t extra_bytes_read = 0; + + //Arithmetic op retrieval + uint8_t a_op = (byte2 & op_mask) >> 3; + + //Immediate value string compose vars + output_string mem_data; + mem_data.len = 0; + mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" + + //Number of chars disp value takes + int disp_len = 0; + enum REGISTER_MOD inst_mod = (byte2 & mod_mask) >> 6; + uint8_t extra_bytes = 1 + (is_wide && !is_sign); + + uint8_t ea_table_value = (byte2 & regm_mask); + uint8_t reg_value = ea_table_value; + + switch (inst_mod) + { + case (REG_NO_DISP): + for (int i = 0; i < extra_bytes; i++) + { + fread(&(payload[i]), sizeof(byte), 1, bin.binary); + extra_bytes_read++; + } + low_data = payload[0]; + if (is_wide && !is_sign) + { + high_data = payload[1]; + effective_data = calc_effective_disp(high_data, low_data, false); + } + else + { + effective_data = calc_effective_disp(0, low_data, true); + } + printf("%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[reg_value + (is_wide * 8)], effective_data); + fprintf(output, "%s %s, %d\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[reg_value + (is_wide * 8)], effective_data); + break; + case (MEM_NO_DISP): + //If R/M equals 110, there actually is displacement to worry about + bool is_direct_address = (ea_table_value == 0b110); + if (is_direct_address) + extra_bytes += 1 + is_wide; + for (int i = 0; i < extra_bytes; i++) + { + fread(&(payload[i]), sizeof(byte), 1, bin.binary); + extra_bytes_read++; + } + + //Composing displacement and immediate values + if (is_direct_address) + { + low_disp = payload[0]; + high_disp = payload[1]; + effective_disp = calc_effective_disp(high_disp, low_disp, false); + low_data = payload[2]; + if (is_wide && !is_sign) + { + high_data = payload[3]; + effective_data = calc_effective_disp(high_data, low_data, false); + } + else + { + effective_data = calc_effective_disp(0, low_data, true); + } + } + else + { + low_data = payload[0]; + if (is_wide && !is_sign) + { + high_data = payload[1]; + effective_data = calc_effective_disp(high_data, low_data, false); + } + else + { + effective_data = calc_effective_disp(0, low_data, true); + } + } + fill_ea_string(&mem_data, (is_direct_address ? "" : EA_ENCODING_TXT[ea_table_value]), effective_disp); + printf("%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); + fprintf(output, "%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); + break; + case (MEM_8BIT_DISP): + extra_bytes += 1; + for (int i = 0; i < extra_bytes; i++) + { + fread(&(payload[i]), sizeof(byte), 1, bin.binary); + extra_bytes_read++; + } + + //Composing displacement and immediate values + low_disp = payload[0]; + effective_disp = calc_effective_disp(0, low_disp, true); + low_data = payload[1]; + if (is_wide && !is_sign) + { + high_data = payload[2]; + effective_data = calc_effective_disp(high_data, low_data, false); + } + else + { + effective_data = calc_effective_disp(0, low_disp, true); + } + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); + printf("%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); + fprintf(output, "%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); + break; + case (MEM_16BIT_DISP): + extra_bytes += 2; + for (int i = 0; i < extra_bytes; i++) + { + fread(&(payload[i]), sizeof(byte), 1, bin.binary); + extra_bytes_read++; + } + + //Composing displacement and immediate values + low_disp = payload[0]; + high_disp = payload[1]; + effective_disp = calc_effective_disp(high_disp, low_disp, false); + + low_data = payload[2]; + if (is_wide) + { + high_data = payload[3]; + effective_data = calc_effective_disp(high_data, low_data, false); + } + else + { + effective_data = calc_effective_disp(0, low_disp, true); + } + + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); + printf("%s %s%s, %d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, WORD_SIGNAL_TXT[is_wide], effective_data); + fprintf(output, "%s %s%s,%d\n", ARITHMETIC_OP_TXT[a_op], WORD_SIGNAL_TXT[is_wide], mem_data.string, effective_data); + break; + } + + return extra_bytes_read; +} + +int ASC_REGM_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read) +{ + const uint8_t wide_mask = 0b0000'0001; + const uint8_t dest_mask = 0b0000'0010; + const uint8_t mod_mask = 0b1100'0000; + const uint8_t regm_mask = 0b0011'1000; + const uint8_t op_mask = 0b0011'1000; + + unsigned char byte[1]; + uint8_t low_disp = 0; + uint8_t high_disp = 0; + uint16_t wide_disp = 0; + int16_t effective_disp = 0; + uint8_t extra_bytes_read = 0; + + uint8_t a_op = (byte1 & op_mask) >> 3; + + output_string mem_data; + mem_data.len = 0; + mem_data.string = calloc(18, sizeof(char)); //"[bp + di + 65535]\0" + + bool reg_is_dest = (byte1 & dest_mask); + uint8_t inst_mod = (byte2 & mod_mask) >> 6; + + //Number of chars disp value takes + int disp_len = 0; + + uint8_t reg_value = 0; + uint8_t ea_table_value = 0; + switch (inst_mod) + { + case (REG_NO_DISP): + //Since we're doing register mode/register to register, both reg and r/m are affected by the W bit + //if D=1, dest is retrieved from the reg field in byte2 and src from r/m + uint8_t dest_value = (reg_is_dest) ? ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8) + : (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8); + uint8_t src_value = (reg_is_dest) ? (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8) + : ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); + fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]); + break; + case (MEM_8BIT_DISP): + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + low_disp = (uint8_t)byte[0]; + + reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + ea_table_value = (byte2 & (regm_mask >> 3)); + effective_disp = calc_effective_disp(0, low_disp, true); + + //EA STRING GENERATION + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); + //END + + printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + break; + case (MEM_16BIT_DISP): + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + low_disp = (uint8_t)byte[0]; + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + high_disp = (uint8_t)byte[0]; + + //Composing wide displacement + effective_disp = calc_effective_disp(high_disp, low_disp, false); + + reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + ea_table_value = (byte2 & (regm_mask >> 3)); + + //EA STRING GENERATION + fill_ea_string(&mem_data, EA_ENCODING_TXT[ea_table_value], effective_disp); + //END + + printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + break; + case (MEM_NO_DISP): + //Checking if special case 110 applies + ea_table_value = (byte2 & (regm_mask >> 3)); + //If R/M equals 110, there actually is displacement to worry about + bool is_direct_address = (ea_table_value == 0b110); + if (is_direct_address) + { + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + low_disp = (uint8_t)byte[0]; + fread(byte, sizeof(byte), 1, bin.binary); + extra_bytes_read++; + high_disp = (uint8_t)byte[0]; + + //Composing wide displacement + effective_disp = calc_effective_disp(high_disp, low_disp, false); + } + + reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8); + + //EA STRING GENERATION + fill_ea_string(&mem_data, (is_direct_address ? " " : EA_ENCODING_TXT[ea_table_value]), effective_disp); + //END + + printf("%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + fprintf(output, "%s %s, %s\n", ARITHMETIC_OP_TXT[a_op], (reg_is_dest ? REG_ENCODING_TXT[reg_value] + : mem_data.string) + , (reg_is_dest ? mem_data.string + : REG_ENCODING_TXT[reg_value])); + break; + } + return extra_bytes_read; +} + +int main(int argc, char** argv) +{ + if (argc != 2) return -1; + if (CHAR_BIT != 8) return -2; + + unsigned char byte[1]; + bin.binary = fopen(argv[1], "rb"); + fseek(bin.binary, 0, SEEK_END); + bin.size = ftell(bin.binary); + fseek(bin.binary, 0, SEEK_SET); + + output = fopen("output.asm", "w"); + fprintf(output, "%s\n\n", "bits 16"); + + for (int bytes_read = 0; bytes_read < bin.size; bytes_read++) + { + uint8_t inst_byte2 = 0; + fread(byte, sizeof(byte), 1, bin.binary); + uint8_t manip_inst = (uint8_t)byte[0]; + uint8_t inst = (uint8_t)byte[0]; + //First, we check for ASC. + manip_inst = (manip_inst >> 1); + if ((manip_inst & ASC_I_A_M) == ASC_I_A) + { + if (bytes_read >= bin.size) break; + fread(byte, sizeof(byte), 1, bin.binary); + bytes_read++; + inst_byte2 = (uint8_t)byte[0]; + bytes_read += ASC_I_A_parse(inst, inst_byte2, 0, 0); + continue; + } + manip_inst = (inst >> 2); + if ((manip_inst & ASC_REGM_R_M) == ASC_REGM_R) + { + if (bytes_read >= bin.size) break; + fread(byte, sizeof(byte), 1, bin.binary); + bytes_read++; + inst_byte2 = (uint8_t)byte[0]; + bytes_read += ASC_REGM_R_parse(inst, inst_byte2, 0, 0); + continue; + } + if (manip_inst == ASC_I_RM) + { + if (bytes_read >= bin.size) break; + fread(byte, sizeof(byte), 1, bin.binary); + bytes_read++; + inst_byte2 = (uint8_t)byte[0]; + bytes_read += ASC_I_RM_parse(inst, inst_byte2, 0, 0); + continue; + } + //Then, we're checking for all MOV except segment registers + manip_inst = (inst >> 1); + switch (manip_inst) + { + case MOV_M_T_A: + case MOV_A_T_M: + if (bytes_read >= bin.size) break; + fread(byte, sizeof(byte), 1, bin.binary); + bytes_read++; + inst_byte2 = (uint8_t)byte[0]; + bytes_read += MOV_MA_T_AM_parse(inst, inst_byte2, 0, 0, (manip_inst == MOV_M_T_A)); + continue; + case MOV_I_T_RM: + if (bytes_read >= bin.size) break; + fread(byte, sizeof(byte), 1, bin.binary); + bytes_read++; + inst_byte2 = (uint8_t)byte[0]; + bytes_read += MOV_I_T_RM_parse(inst, inst_byte2, 0, 0); + continue; + } + manip_inst = (manip_inst >> 1); + switch (manip_inst) + { + case MOV_RM_TF_R: + if (bytes_read >= bin.size) break; + fread(byte, sizeof(byte), 1, bin.binary); + bytes_read++; + inst_byte2 = (uint8_t)byte[0]; + bytes_read += MOV_RM_TF_R_parse(inst, inst_byte2, 0, 0); + continue; + } + manip_inst = (manip_inst >> 2); + if (manip_inst == MOV_I_T_R) + { + if (bytes_read >= bin.size) break; + fread(byte, sizeof(byte), 1, bin.binary); + bytes_read++; + inst_byte2 = (uint8_t)byte[0]; + bytes_read += MOV_I_T_R_parse(inst, inst_byte2, 0, 0); + continue; + } + } + + return 0; +} diff --git a/1-3/listing-41 b/1-3/listing-41 new file mode 100644 index 0000000..b63415c Binary files /dev/null and b/1-3/listing-41 differ diff --git a/1-3/listing-41.asm b/1-3/listing-41.asm new file mode 100644 index 0000000..ff97df4 --- /dev/null +++ b/1-3/listing-41.asm @@ -0,0 +1,121 @@ +; ======================================================================== +; +; (C) Copyright 2023 by Molly Rocket, Inc., All Rights Reserved. +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Please see https://computerenhance.com for further information +; +; ======================================================================== + +; ======================================================================== +; LISTING 41 +; ======================================================================== + +bits 16 + +;add bx, [bx+si] +;add bx, [bp] +add si, 2 +add bp, 2 +add cx, 8 +;add bx, [bp + 0] +;add cx, [bx + 2] +;add bh, [bp + si + 4] +;add di, [bp + di + 6] +;add [bx+si], bx +;add [bp], bx +;add [bp + 0], bx +;add [bx + 2], cx +;add [bp + si + 4], bh +;add [bp + di + 6], di +add byte [bx], 34 +add word [bp + si + 1000], 29 +; add ax, [bp] +; add al, [bx + si] +; add ax, bx +add al, ah +add ax, 1000 +add al, -30 +add al, 9 + +;sub bx, [bx+si] +;sub bx, [bp] +sub si, 2 +sub bp, 2 +sub cx, 8 +;sub bx, [bp + 0] +;sub cx, [bx + 2] +;sub bh, [bp + si + 4] +;sub di, [bp + di + 6] +;sub [bx+si], bx +;sub [bp], bx +;sub [bp + 0], bx +;sub [bx + 2], cx +;sub [bp + si + 4], bh +;sub [bp + di + 6], di +sub byte [bx], 34 +sub word [bx + di], 29 +;sub ax, [bp] +;sub al, [bx + si] +;sub ax, bx +;sub al, ah +sub ax, 1000 +sub al, -30 +sub al, 9 + +;cmp bx, [bx+si] +;cmp bx, [bp] +cmp si, 2 +cmp bp, 2 +cmp cx, 8 +;cmp bx, [bp + 0] +;cmp cx, [bx + 2] +;cmp bh, [bp + si + 4] +;cmp di, [bp + di + 6] +;cmp [bx+si], bx +;cmp [bp], bx +;cmp [bp + 0], bx +;cmp [bx + 2], cx +;cmp [bp + si + 4], bh +;cmp [bp + di + 6], di +cmp byte [bx], 34 +cmp word [4834], 29 +;cmp ax, [bp] +;cmp al, [bx + si] +;cmp ax, bx +;cmp al, ah +cmp ax, 1000 +cmp al, -30 +cmp al, 9 + +; test_label0: +; jnz test_label1 +; jnz test_label0 +; test_label1: +; jnz test_label0 +; jnz test_label1 +; +; label: +; je label +; jl label +; jle label +; jb label +; jbe label +; jp label +; jo label +; js label +; jne label +; jnl label +; jg label +; jnb label +; ja label +; jnp label +; jno label +; jns label +; loop label +; loopz label +; loopnz label +; jcxz label diff --git a/1-3/nasm-compile.sh b/1-3/nasm-compile.sh new file mode 100755 index 0000000..67547f1 --- /dev/null +++ b/1-3/nasm-compile.sh @@ -0,0 +1,5 @@ +#! /usr/bin/env bash +SCRIPT_LOC=$( dirname -- $( readlink -f -- "$0"; )) + +nasm -f bin $SCRIPT_LOC/listing-41.asm -o $SCRIPT_LOC/listing-41 +# nasm -f bin $SCRIPT_LOC/listing-42.asm -o $SCRIPT_LOC/listing-42 diff --git a/1-3/output.asm b/1-3/output.asm new file mode 100644 index 0000000..e69de29