1-2: Complete MOV_RM_TF_R decoding

This commit is contained in:
Hane 2026-01-25 00:11:43 +01:00
commit eb2c1cb0f9
7 changed files with 293 additions and 0 deletions

13
1-1/output.asm Normal file
View file

@ -0,0 +1,13 @@
bits 16
mov cx, bx
mov ch, ah
mov dx, bx
mov si, bx
mov bx, di
mov al, cl
mov ch, ch
mov bx, ax
mov bx, si
mov sp, di
mov bp, ax

BIN
1-2/8086coded Executable file

Binary file not shown.

3
1-2/compile.sh Executable file
View file

@ -0,0 +1,3 @@
#! /usr/bin/env bash
gcc 1-2/decoder.c -O0 -g -gdwarf -o 1-2/8086coded

251
1-2/decoder.c Normal file
View file

@ -0,0 +1,251 @@
#include <stdint.h>
#include <stdio.h>
#include <limits.h>
#include <string.h>
enum DT_INSTRUCTIONS
{
MOV_RM_TF_R = 0b1000'10, //d w
MOV_I_T_RM = 0b1100'011, // w
MOV_I_T_R = 0b1011, // w reg
MOV_M_T_A = 0b1010'000, // w
MOV_A_T_M = 0b1010'001, // w
MOV_RM_T_SR = 0b1000'1110,
MOV_SR_T_RM = 0b1000'1100,
};
enum MOV_REGISTER_MODE
{
MEM_NO_DISP = 0b00,
MEM_8BIT_DISP = 0b01,
MEM_16BIT_DISP = 0b010,
REG_NO_DISP = 0b11
};
#define REG_ENCODING_TXT_LEN 3
char REG_ENCODING_TXT[0b10000][REG_ENCODING_TXT_LEN] = { "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"};
#define EA_ENCODING_TXT_LEN 8
char EA_ENCODING_TXT[0b1000][EA_ENCODING_TXT_LEN] = { "bx + si", "bx + di", "bp + si", "bp + di",
"si", "di", "bp", "bx"};
enum REG_ENCODING
{
R_AL = 0b000,
R_CL = 0b001,
R_DL = 0b010,
R_BL = 0b011,
R_AH = 0b100,
R_CH = 0b101,
R_DH = 0b110,
R_BH = 0b111
};
enum WREG_ENCODING
{
WR_AX = 0b000,
WR_CX = 0b001,
WR_DX = 0b010,
WR_BX = 0b011,
WR_SP = 0b100,
WR_BP = 0b101,
WR_SI = 0b110,
WR_DI = 0b111
};
typedef struct
{
FILE *binary;
uint64_t size;
} binary_data;
FILE *output;
binary_data bin;
int MOV_RM_TF_R_parse(uint8_t byte1, uint8_t byte2, binary_data *binary, int bytes_read)
{
const uint8_t wide_mask = 0b0000'0001;
const uint8_t dest_mask = 0b0000'0010;
const uint8_t mod_mask = 0b1100'0000;
const uint8_t regm_mask = 0b0011'1000;
unsigned char byte[1];
uint8_t low_disp = 0;
uint8_t high_disp = 0;
uint16_t wide_disp = 0;
uint8_t extra_bytes_read = 0;
bool reg_is_dest = (byte1 & dest_mask);
uint8_t inst_mod = (byte2 & mod_mask) >> 6;
//Effective Address string compose vars
char ea_string[18] = {'\0'};
char *ea_string_write_ptr = ea_string;
//Number of chars disp value takes
int disp_len = 0;
uint8_t reg_value = 0;
uint8_t ea_table_value = 0;
switch (inst_mod)
{
case (REG_NO_DISP):
//Since we're doing register mode/register to register, both reg and r/m are affected by the W bit
//if D=1, dest is retrieved from the reg field in byte2 and src from r/m
uint8_t dest_value = (reg_is_dest) ? ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8)
: (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8);
uint8_t src_value = (reg_is_dest) ? (byte2 & (regm_mask >> 3)) + ((byte1 & wide_mask) * 8)
: ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8);
printf("%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]);
fprintf(output, "%s %s, %s\n", "mov", REG_ENCODING_TXT[dest_value], REG_ENCODING_TXT[src_value]);
break;
case (MEM_8BIT_DISP):
fread(byte, sizeof(byte), 1, bin.binary);
extra_bytes_read++;
low_disp = (uint8_t)byte[0];
reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8);
ea_table_value = (byte2 & (regm_mask >> 3));
//EA STRING GENERATION
*ea_string_write_ptr = '[';
ea_string_write_ptr++;
memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value]));
ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]);
memcpy(ea_string_write_ptr, " + ", 3);
ea_string_write_ptr += 3;
disp_len = sprintf(ea_string_write_ptr, "%d", low_disp);
ea_string_write_ptr += disp_len;
*ea_string_write_ptr = ']';
//END
printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value]
: ea_string)
, (reg_is_dest ? ea_string
: REG_ENCODING_TXT[reg_value]));
fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value]
: ea_string)
, (reg_is_dest ? ea_string
: REG_ENCODING_TXT[reg_value]));
break;
case (MEM_16BIT_DISP):
fread(byte, sizeof(byte), 1, bin.binary);
extra_bytes_read++;
low_disp = (uint8_t)byte[0];
fread(byte, sizeof(byte), 1, bin.binary);
extra_bytes_read++;
high_disp = (uint8_t)byte[0];
//Composing wide displacement
wide_disp = high_disp << 8;
wide_disp = wide_disp | low_disp;
reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8);
ea_table_value = (byte2 & (regm_mask >> 3));
//EA STRING GENERATION
*ea_string_write_ptr = '[';
ea_string_write_ptr++;
memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value]));
ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]);
memcpy(ea_string_write_ptr, " + ", 3);
ea_string_write_ptr += 3;
disp_len = sprintf(ea_string_write_ptr, "%d", wide_disp);
ea_string_write_ptr += disp_len;
*ea_string_write_ptr = ']';
//END
printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value]
: ea_string)
, (reg_is_dest ? ea_string
: REG_ENCODING_TXT[reg_value]));
fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value]
: ea_string)
, (reg_is_dest ? ea_string
: REG_ENCODING_TXT[reg_value]));
break;
case (MEM_NO_DISP):
//Checking if special case 110 applies
ea_table_value = (byte2 & (regm_mask >> 3));
//If R/M equals 110, there actually is displacement to worry about
bool is_direct_address = (ea_table_value == 0b110);
if (is_direct_address)
{
fread(byte, sizeof(byte), 1, bin.binary);
extra_bytes_read++;
low_disp = (uint8_t)byte[0];
fread(byte, sizeof(byte), 1, bin.binary);
extra_bytes_read++;
high_disp = (uint8_t)byte[0];
//Composing wide displacement
wide_disp = high_disp << 8;
wide_disp = wide_disp | low_disp;
}
reg_value = ((byte2 & regm_mask) >> 3) + ((byte1 & wide_mask) * 8);
if (is_direct_address)
{
//EA STRING GENERATION
*ea_string_write_ptr = '[';
ea_string_write_ptr++;
disp_len = sprintf(ea_string_write_ptr, "%d", wide_disp);
ea_string_write_ptr += disp_len;
*ea_string_write_ptr = ']';
//END
}
else
{
//EA STRING GENERATION
*ea_string_write_ptr = '[';
ea_string_write_ptr++;
memcpy(ea_string_write_ptr, EA_ENCODING_TXT[ea_table_value], strlen(EA_ENCODING_TXT[ea_table_value]));
ea_string_write_ptr += strlen(EA_ENCODING_TXT[ea_table_value]);
*ea_string_write_ptr = ']';
//END
}
printf("%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value]
: ea_string)
, (reg_is_dest ? ea_string
: REG_ENCODING_TXT[reg_value]));
fprintf(output, "%s %s, %s\n", "mov", (reg_is_dest ? REG_ENCODING_TXT[reg_value]
: ea_string)
, (reg_is_dest ? ea_string
: REG_ENCODING_TXT[reg_value]));
break;
}
return extra_bytes_read;
}
int main(int argc, char** argv)
{
if (argc != 2) return -1;
if (CHAR_BIT != 8) return -2;
unsigned char byte[1];
bin.binary = fopen(argv[1], "rb");
fseek(bin.binary, 0, SEEK_END);
bin.size = ftell(bin.binary);
fseek(bin.binary, 0, SEEK_SET);
output = fopen("output.asm", "w");
fprintf(output, "%s\n\n", "bits 16");
for (int bytes_read = 0; bytes_read < bin.size; bytes_read++)
{
fread(byte, sizeof(byte), 1, bin.binary);
uint8_t manip_inst = (uint8_t)byte[0];
uint8_t inst = (uint8_t)byte[0];
//For now we're just checking for RM_TF_R
manip_inst = (manip_inst >> 2);
if (manip_inst != MOV_RM_TF_R) break;
if (bytes_read >= bin.size) break;
fread(byte, sizeof(byte), 1, bin.binary);
bytes_read++;
uint8_t inst_byte2 = (uint8_t)byte[0];
bytes_read += MOV_RM_TF_R_parse(inst, inst_byte2, 0, 0);
}
return 0;
}

BIN
1-2/listing-40 Normal file

Binary file not shown.

22
1-2/listing-40.asm Normal file
View file

@ -0,0 +1,22 @@
bits 16
; Signed displacements
mov ax, [bx + di - 37]
mov [si - 300], cx
mov dx, [bx - 32]
; Explicit sizes
mov [bp + di], byte 7
mov [di + 901], word 347
; Direct address
mov bp, [5]
mov bx, [3458]
; Memory-to-accumulator test
mov ax, [2555]
mov ax, [16]
; Accumulator-to-memory test
mov [2554], ax
mov [15], ax

4
1-2/nasm-compile.sh Executable file
View file

@ -0,0 +1,4 @@
#! /usr/bin/env bash
nasm -f bin 1-2/listing-39.asm -o 1-2/listing-39
nasm -f bin 1-2/listing-40.asm -o 1-2/listing-40