Skip to content

Commit

Permalink
Added support for varying displacement sizes (#7)
Browse files Browse the repository at this point in the history
* Added values for different sized displacements

Checking clauses has been added to leverage the `OP_MODRM_DISP32`
and `OP_MODRM_DISP8` values (which corrispond to theModR/M values
 from `operand.h`) which allows the sizes of the following displa-
cement bytes of the instructions to be truncated to 8 or 32 bits
and allowing extra space for offsets where needed.

For example a value of 123 would be originally 4 bytes wide padded
with zeros at the end of the instruction, now the new ModR/M mode
will allow the value of 123 to be packed neatly into a single byte
at the end of the instruction

* Updated test file: operand.

Removed duplicate test cases and added new test cases for
commit number f9094eb,
mostly targetted at displacements.

Note: test file currently failing

* Correction: Corrected test case

Changed expected value to `OP_MODRM_DISP32` since `0xFFFF` is grea-
ter than the value to be checked against - `0xFF`(Which is defined
as the largest value of a `uint8-t`)

* Added type cast to smaller type

* Added `write_offset()`

This function not only encapsulates the process of writing
prefixes to the corrisponding ModR/M byte but also checks
the mode generated from the change specified in commit number
4df7a23 and checks for the
size of the offset before writing the offset with the size
into the buffer array

* Fixed issue with signed offsets

According to the Intel manuals, all offsets regardless of size are
signed integers, therefore, we cannot assume to use its signed max
value of 255 as specified in `UINT8_MAX` (defined in <stdint.h>)
but instead needs to use the corrisponding maximum value for the
signed counterpart to `uint8_t` which is `INT8_MAX`

Now the `op_modrm_mode()` will accept the offset values in the form
of `int8_t`s as the `uint8_t`s will not properly support signed of-
fsets that Intel expects to be written to the end of the instruction
encoded form.

* Added comments to reflect changes in e0ad9bf
  • Loading branch information
cheng-alvin authored Dec 15, 2024
1 parent aa48924 commit c78bace
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 13 deletions.
30 changes: 22 additions & 8 deletions libjas/encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,18 @@ static void ref_label(operand_t *op_arr, buffer_t *buf, uint8_t index) {
buf_write(buf, (uint8_t *)&rel_offset, rel_sz);
}

static void write_offset(uint8_t mode, buffer_t *buf, operand_t *op_arr, uint8_t index) {
switch (mode) {
case OP_MODRM_DISP8:
buf_write_byte(buf, (uint8_t)op_arr[index].offset);
break;

case OP_MODRM_DISP32:
buf_write(buf, (uint8_t *)&op_arr[index].offset, 4);
break;
}
}

DEFINE_ENCODER(i) {
// Error checking - A register only & only 8, 16, 32 bit-sized operands
if (reg_lookup_val(op_arr[0].data) != 0 && !reg_needs_rex((enum registers)op_arr[0].data)) {
Expand All @@ -90,9 +102,10 @@ DEFINE_ENCODER(m) {

op_write_prefix(buf, op_arr, mode);
check_mode(mode, instr_ref->support);

buf_write(buf, OP_OPCODE_HELPER, instr_ref->opcode_size);
buf_write_byte(buf, op_modrm_mode(op_arr[0]) | opcode_extend | rm);

const uint8_t mod = op_modrm_mode(op_arr[0]);
buf_write_byte(buf, mod | opcode_extend | rm);

if (op_m(op_arr[0].type) && rm == 4)
buf_write_byte(buf, EMPTY_SIB);
Expand All @@ -104,8 +117,7 @@ DEFINE_ENCODER(m) {
if (rm == 5 && op_arr[0].offset == 0)
buf_write_byte(buf, 0);

if (op_arr[0].offset != 0)
buf_write(buf, (uint8_t *)&op_arr[0].offset, 4);
write_offset(mod, buf, op_arr, 0);
}

static void i_common(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *instr_ref, enum modes mode) {
Expand Down Expand Up @@ -169,7 +181,8 @@ static void mr_rm_ref(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *in
check_mode(mode, instr_ref->support);
buf_write(buf, OP_OPCODE_HELPER, instr_ref->opcode_size);

buf_write_byte(buf, op_modrm_mode(op_arr[rm_idx]) | (reg << 3) | rm);
const uint8_t mod = op_modrm_mode(op_arr[rm_idx]);
buf_write_byte(buf, mod | (reg << 3) | rm);

/**
* @note
Expand Down Expand Up @@ -202,9 +215,10 @@ static void mr_rm_ref(operand_t *op_arr, buffer_t *buf, instr_encode_table_t *in
if (rm == 5 && op_arr[rm_idx].offset == 0)
buf_write_byte(buf, 0);

if (op_arr[rm_idx].offset != 0) {
buf_write(buf, (uint8_t *)&op_arr[rm_idx].offset, 4);
}
// if (op_arr[rm_idx].offset != 0) {
// buf_write(buf, (uint8_t *)&op_arr[rm_idx].offset, 4);
// }
write_offset(mod, buf, op_arr, rm_idx);
}

DEFINE_ENCODER(mr) { mr_rm_ref(op_arr, buf, instr_ref, mode, false); }
Expand Down
4 changes: 4 additions & 0 deletions libjas/include/operand.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ enum enc_ident op_ident_identify(enum operands *input);
* @note Function also performs checks for RIP, ESP, IP instr-
* uction pointers for offset and ModR/M bytes and modes.
*
* @note The function requires the offset value to be typed as
* signed types to prevent confusion and to match with the Intel
* -required specifications as outlined.
*
* @see `operand_t`
*/
uint8_t op_modrm_mode(operand_t input);
Expand Down
9 changes: 7 additions & 2 deletions libjas/operand.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,13 @@ uint8_t op_modrm_mode(operand_t input) {
if (op_m(input.type) && input.offset == 0)
return OP_MODRM_INDIRECT;

else if (input.offset != 0)
return OP_MODRM_DISP32;
if (input.offset != 0) {
if ((intmax_t)input.offset > INT32_MAX) err("Displacement value is too large.");
if ((intmax_t)input.offset > INT8_MAX) // Size of a `uint8_t`
return OP_MODRM_DISP32;

return OP_MODRM_DISP8; // Revert to 8-bit displacement when extra space is not needed
}

return OP_MODRM_REG;
}
Expand Down
5 changes: 2 additions & 3 deletions tests/operand.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,10 @@ Test(operand, modrm_mode) {
int expected_mode;
} test_cases[] = {
{op_construct_operand(OP_M64, 0, &(enum registers){REG_RIP}), OP_MODRM_INDIRECT},
{op_construct_operand(OP_M64, 0, &(enum registers){REG_EIP}), OP_MODRM_INDIRECT},
{op_construct_operand(OP_M64, 0, &(enum registers){REG_IP}), OP_MODRM_INDIRECT},
{op_construct_operand(OP_M64, 0, &(enum registers){REG_RAX}), OP_MODRM_INDIRECT},
{op_construct_operand(OP_M64, 8, &(enum registers){REG_RAX}), OP_MODRM_DISP32},
{op_construct_operand(OP_M64, 8, &(enum registers){REG_RAX}), OP_MODRM_DISP8},
{op_construct_operand(OP_M64, 0, &(enum registers){REG_RBP}), OP_MODRM_DISP8},
{op_construct_operand(OP_M64, 0xFFFF, &(enum registers){REG_RAX}), OP_MODRM_DISP32},
};

for (size_t i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
Expand Down

0 comments on commit c78bace

Please sign in to comment.