Skip to content

Commit

Permalink
Added instr_get_tab() (#15)
Browse files Browse the repository at this point in the history
* Addded `instr_get_tab()`

This commit added the `instr_get_tab()` function, this function
takes in the instruction in a `instruction_t` form and outputs the
instruction encoder tables required based off the instruction's
encooder identity from the `op_identify()` function implemented
in C++ in the `operand.cpp` file.

This function encapsulates the responsibilties of the `assemble()`
function and allows further optimisations by caching certain data,
but also allows it to be exposed to the callers of the library

* Integrated encoder lookup with `instr_get_tab()`

As implemented in commit d382803,
a new function has taken over the looking up of the instruction
encder reference tables, this commit describes the changes required
to migrate the previous code and leveraging the new `instr_get_tab()`
funciton.

Now, the tables returned from the `instr_get_tab` can be cached and
other encoding instances (Since there are two passes of the assemb-
ler, one pre and one normal pass)inside the `assemble()` function
may reference the passed down `tabs` argument, preventing the dup-
licate invokation of the searching of the instruction encoder ref-
erence tables. (Well, yea its a small small optimisation but it could
potentially save time when indexing large sets of instructions)

* Moved over `IS_LABEL` and added function def

Since the `IS_LABEL` macro is used in many modules, namely the
`codegen` and `instruction` modules, it has been exposed in this
header file. More consideration will be given to its exact "res-
ting place" in the future, this is just a place to stash a dec
for now until a new place is found.

Also, the corrisponding `instr_get_tab()` function, although lacking
documentation has been declared in this header file to allow the
`codegen` module among many other external modules and library users
to access the function in 1950a53:

> migrate the previous code and leveraging the new `instr_get_tab()`
> [function].

* Added previously missing documentation

This commit added the previously missing documentation messages
that documented the functionality and parameters of the `instr_
get_tabs()` function
  • Loading branch information
cheng-alvin authored Dec 20, 2024
1 parent 44ff998 commit e908290
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 45 deletions.
69 changes: 24 additions & 45 deletions libjas/codegen.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
#include <stdlib.h>
#include <string.h>

#define CURR_TABLE instr_table[instr_arr[i].instr][j]

#define FREE_ALL(...) \
do { \
void *pointers[] = {__VA_ARGS__}; \
Expand All @@ -43,7 +41,20 @@

bool is_pre = false;

static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_size); // TODO Fix the stupid hack
static instr_encode_table_t *get_instr_tabs(instruction_t *instr_arr, size_t arr_size) {
instr_encode_table_t *tabs = malloc(sizeof(instr_encode_table_t) * arr_size);

for (size_t i = 0; i < arr_size; i++) {
if (IS_LABEL(instr_arr[i])) {
tabs[i] = INSTR_TERMINATOR;
continue;
}
tabs[i] = instr_get_tab(instr_arr[i]);
}
return tabs;
}

static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_size, instr_encode_table_t *tabs);
buffer_t codegen(enum modes mode, instruction_t *instr_arr, size_t arr_size, enum codegen_modes exec_mode) {
for (size_t i = 0; i < arr_size / sizeof(instruction_t); i++) {
if (instr_arr[i].instr >= INSTR_DIR_LOCAL_LABEL) {
Expand All @@ -57,10 +68,14 @@ buffer_t codegen(enum modes mode, instruction_t *instr_arr, size_t arr_size, enu
}

is_pre = true;
free(assemble(mode, instr_arr, arr_size).data);

// Spaghetti code warning 🍝🍝🍝
const instr_encode_table_t *tabs = get_instr_tabs(instr_arr, arr_size / sizeof(instruction_t));
free(assemble(mode, instr_arr, arr_size, tabs).data);

is_pre = false;
const buffer_t code = assemble(mode, instr_arr, arr_size);
const buffer_t code = assemble(mode, instr_arr, arr_size, tabs);
free(tabs);

if (exec_mode == CODEGEN_RAW) return code;

Expand Down Expand Up @@ -140,11 +155,7 @@ buffer_t codegen(enum modes mode, instruction_t *instr_arr, size_t arr_size, enu
return out;
}

// Macro for checking if the instruction is a label and shall be handled
#define IS_LABEL (uint8_t) instr_arr[i].instr >= (uint8_t)INSTR_DIR_LOCAL_LABEL && \
(uint8_t)instr_arr[i].instr <= (uint8_t)INSTR_DIR_EXTERN_LABEL

static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_size) {
static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_size, instr_encode_table_t *tabs) {
arr_size /= sizeof(instruction_t);
buffer_t buf = BUF_NULL;

Expand All @@ -158,7 +169,7 @@ static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_s
const buffer_t *data = (buffer_t *)instr_arr[i].operands[0].data;
buf_write(&buf, data->data, data->len);
}
if (is_pre && IS_LABEL) {
if (is_pre && IS_LABEL(instr_arr[i])) {
for (size_t j = 0; j < label_get_size; j++) {
label_t *tab = label_get_table();
if (strcmp(tab[j].name, instr_arr[i].operands[0].data) == 0) {
Expand All @@ -171,42 +182,10 @@ static buffer_t assemble(enum modes mode, instruction_t *instr_arr, size_t arr_s
continue;
}

const instr_encode_table_t ref = tabs[i];
instruction_t current = instr_arr[i];

const enum operands operand_list[4] = {
current.operands[0].type,
current.operands[1].type,
current.operands[2].type,
current.operands[3].type,
};

enum enc_ident ident = op_ident_identify(operand_list);
if (instr_arr[i].instr == INSTR_MOV) {
if (ident == OP_MI)
ident = OP_OI;

if (ident == OP_I)
ident = OP_O;
}

instr_encode_table_t ref;
unsigned int j = 0;
while (CURR_TABLE.opcode_size != 0) {
if (CURR_TABLE.ident == ident) {
ref = CURR_TABLE;
break;
}
j++;
}

if (ref.opcode_size == 0) {
err("No corrsponding instruction opcode found.");
free(buf.data);
return BUF_NULL;
}

if (ref.pre != NULL) ref.pre(current.operands, &buf, &ref, (enum modes)mode);
instr_encode_func(ident)(current.operands, &buf, &ref, (enum modes)mode);
instr_encode_func(ref.ident)(current.operands, &buf, &ref, (enum modes)mode);
}

return buf;
Expand Down
18 changes: 18 additions & 0 deletions libjas/include/instruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,4 +150,22 @@ instr_encoder_t instr_encode_func(enum enc_ident input);
#define INSTR_NULL \
(instruction_t) { .instr = NULL, .operands = NULL }

// Macro for checking if the instruction is a label and shall be handled
#define IS_LABEL(x) \
(uint8_t) x.instr >= (uint8_t)INSTR_DIR_LOCAL_LABEL && \
(uint8_t)x.instr <= (uint8_t)INSTR_DIR_EXTERN_LABEL

/**
* Function for getting the instruction table based on the instruction
* struct provided. The function will return a instruction table struct
* as described above in this header file.
*
* @param instr The instruction struct to get the identifier from
* @return The instruction table struct
*
* @see `instr_encode_table_t`
* @see `instruction_t`
*/
instr_encode_table_t instr_get_tab(instruction_t instr);

#endif
31 changes: 31 additions & 0 deletions libjas/instruction.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,37 @@ instr_encode_table_t *instr_table[] =

// clang-format on

#define CURR_TABLE instr_table[instr.instr][j]

instr_encode_table_t instr_get_tab(instruction_t instr) {
if (IS_LABEL(instr)) return INSTR_TERMINATOR; // aka empty
const enum operands operand_list[4] = {
instr.operands[0].type,
instr.operands[1].type,
instr.operands[2].type,
instr.operands[3].type,
};

enum enc_ident ident = op_ident_identify(operand_list);
if (instr.instr == INSTR_MOV) {
if (ident == OP_MI) ident = OP_OI;
if (ident == OP_I) ident = OP_O;
}

unsigned int j = 0;
while (CURR_TABLE.opcode_size != 0) {
if (CURR_TABLE.ident == ident) {
return CURR_TABLE;
break;
}
j++;
}

// fall-through; no corresponding instruction opcode found
err("No corrsponding instruction opcode found.");
return INSTR_TERMINATOR; // aka empty
}

instr_encoder_t instr_encode_func(enum enc_ident input) {
instr_encoder_t lookup[] = {&mr, &rm, &oi, &mi, &i, &m, &zo, &d, &o};
return lookup[(size_t)input];
Expand Down

0 comments on commit e908290

Please sign in to comment.