Skip to content

Reduce memory usage for instruction block #232

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,8 @@ typedef struct rv_insn {
* specific IR array without the need for additional copying.
*/
struct rv_insn *branch_taken, *branch_untaken;

struct rv_insn *next;
} rv_insn_t;

/* decode the RISC-V instruction */
Expand Down
128 changes: 79 additions & 49 deletions src/emulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
Expand All @@ -30,6 +31,7 @@ extern struct target_ops gdbstub_ops;
#endif

#include "decode.h"
#include "mpool.h"
#include "riscv.h"
#include "riscv_private.h"
#include "state.h"
Expand Down Expand Up @@ -277,16 +279,17 @@ static inline uint32_t hash(size_t k)
return k;
}

static void block_translate(riscv_t *rv, block_map_t *map, block_t *block);
/* allocate a basic block */
static block_t *block_alloc(const uint8_t bits)
static block_t *block_alloc(riscv_t *rv, block_map_t *map)
{
block_t *block = malloc(sizeof(struct block));
block_t *block = mpool_alloc(map->block_mp);
assert(block);
block->insn_capacity = 1 << bits;
block->n_insn = 0;
block->predict = NULL;
block->ir = malloc(block->insn_capacity * sizeof(rv_insn_t));
assert(block->ir);

/* Initialize remaining part of block_t */
block_translate(rv, map, block);
return block;
}

Expand Down Expand Up @@ -366,7 +369,7 @@ static uint32_t last_pc = 0;
rv->PC += ir->insn_len; \
if (unlikely(RVOP_NO_NEXT(ir))) \
return true; \
const rv_insn_t *next = ir + 1; \
const rv_insn_t *next = ir->next; \
MUST_TAIL return next->impl(rv, next); \
}

Expand Down Expand Up @@ -395,36 +398,47 @@ enum {
#undef _
};

/* FIXME: This will simply find the n-th instruction by iterating
* the linked list linearly, we may want to find better approach. */
Comment on lines +401 to +402

Check notice

Code scanning / CodeQL

FIXME comment

FIXME comment: This will simply find the n-th instruction by iterating \[...\]
FORCE_INLINE rv_insn_t *next_nth_insn(rv_insn_t *ir, int32_t n)
{
rv_insn_t *tmp = ir;
for (int32_t iter = 0; iter < n; iter++)
tmp = tmp->next;
return tmp;
}

/* multiple lui */
static bool do_fuse1(riscv_t *rv, const rv_insn_t *ir)
static bool do_fuse1(riscv_t *rv, rv_insn_t *ir)
{
rv->csr_cycle += ir->imm2;
for (int i = 0; i < ir->imm2; i++) {
const rv_insn_t *cur_ir = ir + i;
int i;
rv_insn_t *cur_ir;
for (i = 0, cur_ir = ir; i < ir->imm2; i++, cur_ir = cur_ir->next) {
rv->X[cur_ir->rd] = cur_ir->imm;
}
rv->PC += ir->imm2 * ir->insn_len;
if (unlikely(RVOP_NO_NEXT(ir)))
return true;
const rv_insn_t *next = ir + ir->imm2;
const rv_insn_t *next = next_nth_insn(ir, ir->imm2);
MUST_TAIL return next->impl(rv, next);
}

/* LUI + ADD */
static bool do_fuse2(riscv_t *rv, const rv_insn_t *ir)
static bool do_fuse2(riscv_t *rv, rv_insn_t *ir)
{
rv->csr_cycle += 2;
rv->X[ir->rd] = ir->imm;
rv->X[ir->rs2] = rv->X[ir->rd] + rv->X[ir->rs1];
rv->PC += 2 * ir->insn_len;
if (unlikely(RVOP_NO_NEXT(ir)))
return true;
const rv_insn_t *next = ir + 2;
const rv_insn_t *next = next_nth_insn(ir, 2);
MUST_TAIL return next->impl(rv, next);
}

/* multiple SW */
static bool do_fuse3(riscv_t *rv, const rv_insn_t *ir)
static bool do_fuse3(riscv_t *rv, rv_insn_t *ir)
{
rv->csr_cycle += ir->imm2;
opcode_fuse_t *fuse = ir->fuse;
Expand All @@ -442,12 +456,12 @@ static bool do_fuse3(riscv_t *rv, const rv_insn_t *ir)
rv->PC += ir->imm2 * ir->insn_len;
if (unlikely(RVOP_NO_NEXT(ir)))
return true;
const rv_insn_t *next = ir + ir->imm2;
const rv_insn_t *next = next_nth_insn(ir, ir->imm2);
MUST_TAIL return next->impl(rv, next);
}

/* multiple LW */
static bool do_fuse4(riscv_t *rv, const rv_insn_t *ir)
static bool do_fuse4(riscv_t *rv, rv_insn_t *ir)
{
rv->csr_cycle += ir->imm2;
opcode_fuse_t *fuse = ir->fuse;
Expand All @@ -465,7 +479,7 @@ static bool do_fuse4(riscv_t *rv, const rv_insn_t *ir)
rv->PC += ir->imm2 * ir->insn_len;
if (unlikely(RVOP_NO_NEXT(ir)))
return true;
const rv_insn_t *next = ir + ir->imm2;
const rv_insn_t *next = next_nth_insn(ir, ir->imm2);
MUST_TAIL return next->impl(rv, next);
}

Expand All @@ -479,7 +493,7 @@ static bool do_fuse5(riscv_t *rv, const rv_insn_t *ir)
rv->PC = rv->X[rv_reg_ra] & ~1U;
if (unlikely(RVOP_NO_NEXT(ir)))
return true;
const rv_insn_t *next = ir + 1;
const rv_insn_t *next = ir->next;
MUST_TAIL return next->impl(rv, next);
}

Expand All @@ -493,7 +507,7 @@ static bool do_fuse6(riscv_t *rv, const rv_insn_t *ir)
rv->PC = rv->X[rv_reg_ra] & ~1U;
if (unlikely(RVOP_NO_NEXT(ir)))
return true;
const rv_insn_t *next = ir + 1;
const rv_insn_t *next = ir->next;
MUST_TAIL return next->impl(rv, next);
}

Expand Down Expand Up @@ -541,15 +555,21 @@ FORCE_INLINE bool insn_is_unconditional_branch(uint8_t opcode)
return false;
}

static void block_translate(riscv_t *rv, block_t *block)
static void block_translate(riscv_t *rv, block_map_t *map, block_t *block)
{
block->pc_start = block->pc_end = rv->PC;

rv_insn_t *prev_ir = NULL;
rv_insn_t *ir = mpool_alloc(map->block_ir_mp);
block->ir_head = ir;

/* translate the basic block */
while (block->n_insn < block->insn_capacity) {
rv_insn_t *ir = block->ir + block->n_insn;
while (true) {
memset(ir, 0, sizeof(rv_insn_t));

if (prev_ir)
prev_ir->next = ir;

/* fetch the next instruction */
const uint32_t insn = rv->io.mem_ifetch(block->pc_end);

Expand All @@ -564,21 +584,29 @@ static void block_translate(riscv_t *rv, block_t *block)
/* compute the end of pc */
block->pc_end += ir->insn_len;
block->n_insn++;
prev_ir = ir;
/* stop on branch */
if (insn_is_branch(ir->opcode))
break;

ir = mpool_alloc(map->block_ir_mp);
}
block->ir[block->n_insn - 1].tailcall = true;

assert(prev_ir);
block->ir_tail = prev_ir;
block->ir_tail->tailcall = true;
}

#define COMBINE_MEM_OPS(RW) \
count = 1; \
next_ir = ir + 1; \
next_ir = ir->next; \
tmp_ir = next_ir; \
if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw)) \
break; \
sign = (ir->imm - next_ir->imm) >> 31 ? -1 : 1; \
for (uint32_t j = 1; j < block->n_insn - 1 - i; j++) { \
next_ir = ir + j; \
next_ir = tmp_ir; \
for (uint32_t j = 1; j < block->n_insn - 1 - i; \
j++, next_ir = next_ir->next) { \
if (next_ir->opcode != IIF(RW)(rv_insn_lw, rv_insn_sw) || \
ir->rs1 != next_ir->rs1 || ir->imm - next_ir->imm != 4 * sign) \
break; \
Expand All @@ -590,8 +618,8 @@ static void block_translate(riscv_t *rv, block_t *block)
ir->imm2 = count; \
memcpy(ir->fuse, ir, sizeof(opcode_fuse_t)); \
ir->impl = dispatch_table[ir->opcode]; \
for (int j = 1; j < count; j++) { \
next_ir = ir + j; \
next_ir = tmp_ir; \
for (int j = 1; j < count; j++, next_ir = next_ir->next) { \
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
} \
ir->tailcall = next_ir->tailcall; \
Expand Down Expand Up @@ -825,7 +853,7 @@ static bool detect_memcpy(riscv_t *rv, int lib)

static bool libc_substitute(riscv_t *rv, block_t *block)
{
rv_insn_t *ir = block->ir, *next_ir = NULL;
rv_insn_t *ir = block->ir_head, *next_ir = NULL;
switch (ir->opcode) {
case rv_insn_addi:
/* Compare the target block with the first basic block of
Expand All @@ -835,10 +863,10 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
* instruction sequence.
*/
if (ir->imm == 15 && ir->rd == rv_reg_t1 && ir->rs1 == rv_reg_zero) {
next_ir = ir + 1;
next_ir = ir->next;
if (next_ir->opcode == rv_insn_addi && next_ir->rd == rv_reg_a4 &&
next_ir->rs1 == rv_reg_a0 && next_ir->rs2 == rv_reg_zero) {
next_ir = next_ir + 1;
next_ir = next_ir->next;
if (next_ir->opcode == rv_insn_bgeu && next_ir->imm == 60 &&
next_ir->rs1 == rv_reg_t1 && next_ir->rs2 == rv_reg_a2) {
if (detect_memset(rv, 1)) {
Expand All @@ -851,7 +879,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
}
} else if (ir->imm == 0 && ir->rd == rv_reg_t1 &&
ir->rs1 == rv_reg_a0) {
next_ir = ir + 1;
next_ir = ir->next;
if (next_ir->opcode == rv_insn_beq && next_ir->rs1 == rv_reg_a2 &&
next_ir->rs2 == rv_reg_zero) {
if (next_ir->imm == 20 && detect_memset(rv, 2)) {
Expand All @@ -876,14 +904,14 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
*/
if (ir->rd == rv_reg_a5 && ir->rs1 == rv_reg_a0 &&
ir->rs2 == rv_reg_a1) {
next_ir = ir + 1;
next_ir = ir->next;
if (next_ir->opcode == rv_insn_andi && next_ir->imm == 3 &&
next_ir->rd == rv_reg_a5 && next_ir->rs1 == rv_reg_a5) {
next_ir = next_ir + 1;
next_ir = next_ir->next;
if (next_ir->opcode == rv_insn_add &&
next_ir->rd == rv_reg_a7 && next_ir->rs1 == rv_reg_a0 &&
next_ir->rs2 == rv_reg_a2) {
next_ir = next_ir + 1;
next_ir = next_ir->next;
if (next_ir->opcode == rv_insn_bne && next_ir->imm == 104 &&
next_ir->rs1 == rv_reg_a5 &&
next_ir->rs2 == rv_reg_zero) {
Expand Down Expand Up @@ -912,12 +940,15 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
*/
static void match_pattern(block_t *block)
{
for (uint32_t i = 0; i < block->n_insn - 1; i++) {
rv_insn_t *ir = block->ir + i, *next_ir = NULL;
uint32_t i;
rv_insn_t *ir;
for (i = 0, ir = block->ir_head; i < block->n_insn - 1;
i++, ir = ir->next) {
rv_insn_t *next_ir = NULL, *tmp_ir = NULL;
int32_t count = 0, sign = 1;
switch (ir->opcode) {
case rv_insn_lui:
next_ir = ir + 1;
next_ir = ir->next;
switch (next_ir->opcode) {
case rv_insn_add:
if (ir->rd == next_ir->rs2 || ir->rd == next_ir->rs1) {
Expand All @@ -940,7 +971,7 @@ static void match_pattern(block_t *block)
count++;
if (next_ir->tailcall)
break;
next_ir++;
next_ir = next_ir->next;
}
ir->imm2 = count;
ir->opcode = rv_insn_fuse1;
Expand Down Expand Up @@ -994,8 +1025,10 @@ static void optimize_constant(riscv_t *rv, block_t *block)
constopt_info_t constopt_info = {0};
constopt_info.is_constant[0] = true;
assert(rv->X[0] == 0);
for (uint32_t i = 0; i < block->n_insn; i++) {
rv_insn_t *ir = block->ir + i;

uint32_t i;
rv_insn_t *ir;
for (i = 0, ir = block->ir_head; i < block->n_insn; i++, ir = ir->next) {
((constopt_func_t) constopt_table[ir->opcode])(ir, &constopt_info);
}
}
Expand All @@ -1014,10 +1047,7 @@ static block_t *block_find_or_translate(riscv_t *rv)
}

/* allocate a new block */
next = block_alloc(10);

/* translate the basic block */
block_translate(rv, next);
next = block_alloc(rv, map);

if (!libc_substitute(rv, next)) {
optimize_constant(rv, next);
Expand Down Expand Up @@ -1075,27 +1105,27 @@ void rv_step(riscv_t *rv, int32_t cycles)
if (prev->pc_start != last_pc)
prev = block_find(&rv->block_map, last_pc);

rv_insn_t *last_ir = prev->ir + prev->n_insn - 1;
rv_insn_t *last_ir = prev->ir_tail;
/* chain block */
if (!insn_is_unconditional_branch(last_ir->opcode)) {
if (branch_taken && !last_ir->branch_taken)
last_ir->branch_taken = block->ir;
last_ir->branch_taken = block->ir_head;
else if (!last_ir->branch_untaken)
last_ir->branch_untaken = block->ir;
last_ir->branch_untaken = block->ir_head;
} else if (last_ir->opcode == rv_insn_jal
#if RV32_HAS(EXT_C)
|| last_ir->opcode == rv_insn_cj ||
last_ir->opcode == rv_insn_cjal
#endif
) {
if (!last_ir->branch_taken)
last_ir->branch_taken = block->ir;
last_ir->branch_taken = block->ir_head;
}
}
last_pc = rv->PC;

/* execute the block */
const rv_insn_t *ir = block->ir;
const rv_insn_t *ir = block->ir_head;
if (unlikely(!ir->impl(rv, ir)))
break;

Expand Down
Loading