Skip to content

Commit 8b59e9d

Browse files
authored
Merge pull request #234 from qwe661234/limit_memory_usage
Remove useless IR when fusing instructions
2 parents 79ce192 + ac05003 commit 8b59e9d

File tree

1 file changed

+44
-30
lines changed

1 file changed

+44
-30
lines changed

src/emulate.c

Lines changed: 44 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -398,29 +398,18 @@ enum {
398398
#undef _
399399
};
400400

401-
/* FIXME: This will simply find the n-th instruction by iterating
402-
* the linked list linearly, we may want to find better approach. */
403-
FORCE_INLINE rv_insn_t *next_nth_insn(rv_insn_t *ir, int32_t n)
404-
{
405-
rv_insn_t *tmp = ir;
406-
for (int32_t iter = 0; iter < n; iter++)
407-
tmp = tmp->next;
408-
return tmp;
409-
}
410-
411401
/* multiple lui */
412402
static bool do_fuse1(riscv_t *rv, rv_insn_t *ir)
413403
{
414404
rv->csr_cycle += ir->imm2;
415-
int i;
416-
rv_insn_t *cur_ir;
417-
for (i = 0, cur_ir = ir; i < ir->imm2; i++, cur_ir = cur_ir->next) {
418-
rv->X[cur_ir->rd] = cur_ir->imm;
405+
opcode_fuse_t *fuse = ir->fuse;
406+
for (int i = 0; i < ir->imm2; i++) {
407+
rv->X[fuse[i].rd] = fuse[i].imm;
419408
}
420409
rv->PC += ir->imm2 * ir->insn_len;
421410
if (unlikely(RVOP_NO_NEXT(ir)))
422411
return true;
423-
const rv_insn_t *next = next_nth_insn(ir, ir->imm2);
412+
const rv_insn_t *next = ir->next;
424413
MUST_TAIL return next->impl(rv, next);
425414
}
426415

@@ -433,7 +422,7 @@ static bool do_fuse2(riscv_t *rv, rv_insn_t *ir)
433422
rv->PC += 2 * ir->insn_len;
434423
if (unlikely(RVOP_NO_NEXT(ir)))
435424
return true;
436-
const rv_insn_t *next = next_nth_insn(ir, 2);
425+
const rv_insn_t *next = ir->next;
437426
MUST_TAIL return next->impl(rv, next);
438427
}
439428

@@ -456,7 +445,7 @@ static bool do_fuse3(riscv_t *rv, rv_insn_t *ir)
456445
rv->PC += ir->imm2 * ir->insn_len;
457446
if (unlikely(RVOP_NO_NEXT(ir)))
458447
return true;
459-
const rv_insn_t *next = next_nth_insn(ir, ir->imm2);
448+
const rv_insn_t *next = ir->next;
460449
MUST_TAIL return next->impl(rv, next);
461450
}
462451

@@ -479,7 +468,7 @@ static bool do_fuse4(riscv_t *rv, rv_insn_t *ir)
479468
rv->PC += ir->imm2 * ir->insn_len;
480469
if (unlikely(RVOP_NO_NEXT(ir)))
481470
return true;
482-
const rv_insn_t *next = next_nth_insn(ir, ir->imm2);
471+
const rv_insn_t *next = ir->next;
483472
MUST_TAIL return next->impl(rv, next);
484473
}
485474

@@ -622,7 +611,7 @@ static void block_translate(riscv_t *rv, block_map_t *map, block_t *block)
622611
for (int j = 1; j < count; j++, next_ir = next_ir->next) { \
623612
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
624613
} \
625-
ir->tailcall = next_ir->tailcall; \
614+
remove_next_nth_ir(rv, ir, block, count - 1); \
626615
}
627616

628617
static bool detect_memset(riscv_t *rv, int lib)
@@ -851,6 +840,23 @@ static bool detect_memcpy(riscv_t *rv, int lib)
851840
return true;
852841
}
853842

843+
FORCE_INLINE void remove_next_nth_ir(riscv_t *rv,
844+
rv_insn_t *ir,
845+
block_t *block,
846+
uint8_t n)
847+
{
848+
for (uint8_t i = 0; i < n; i++) {
849+
rv_insn_t *next = ir->next;
850+
ir->next = ir->next->next;
851+
mpool_free(rv->block_map.block_ir_mp, next);
852+
}
853+
if (!ir->next) {
854+
block->ir_tail = ir;
855+
ir->tailcall = true;
856+
}
857+
block->n_insn -= n;
858+
}
859+
854860
static bool libc_substitute(riscv_t *rv, block_t *block)
855861
{
856862
rv_insn_t *ir = block->ir_head, *next_ir = NULL;
@@ -872,7 +878,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
872878
if (detect_memset(rv, 1)) {
873879
ir->opcode = rv_insn_fuse5;
874880
ir->impl = dispatch_table[ir->opcode];
875-
ir->tailcall = true;
881+
remove_next_nth_ir(rv, ir, block, 2);
876882
return true;
877883
};
878884
}
@@ -885,12 +891,12 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
885891
if (next_ir->imm == 20 && detect_memset(rv, 2)) {
886892
ir->opcode = rv_insn_fuse5;
887893
ir->impl = dispatch_table[ir->opcode];
888-
ir->tailcall = true;
894+
remove_next_nth_ir(rv, ir, block, 2);
889895
return true;
890896
} else if (next_ir->imm == 28 && detect_memcpy(rv, 2)) {
891897
ir->opcode = rv_insn_fuse6;
892898
ir->impl = dispatch_table[ir->opcode];
893-
ir->tailcall = true;
899+
remove_next_nth_ir(rv, ir, block, 2);
894900
return true;
895901
};
896902
}
@@ -918,7 +924,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
918924
if (detect_memcpy(rv, 1)) {
919925
ir->opcode = rv_insn_fuse6;
920926
ir->impl = dispatch_table[ir->opcode];
921-
ir->tailcall = true;
927+
remove_next_nth_ir(rv, ir, block, 3);
922928
return true;
923929
};
924930
}
@@ -938,7 +944,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
938944
* Strategies are being devised to increase the number of instructions that
939945
* match the pattern, including possible instruction reordering.
940946
*/
941-
static void match_pattern(block_t *block)
947+
static void match_pattern(riscv_t *rv, block_t *block)
942948
{
943949
uint32_t i;
944950
rv_insn_t *ir;
@@ -960,23 +966,31 @@ static void match_pattern(block_t *block)
960966
ir->rs1 = next_ir->rs2;
961967
ir->impl = dispatch_table[ir->opcode];
962968
ir->tailcall = next_ir->tailcall;
969+
remove_next_nth_ir(rv, ir, block, 1);
963970
}
964971
break;
965972
case rv_insn_lui:
966973
count = 1;
967974
while (1) {
968975
if (next_ir->opcode != rv_insn_lui)
969976
break;
970-
next_ir->opcode = rv_insn_nop;
971977
count++;
972978
if (next_ir->tailcall)
973979
break;
974980
next_ir = next_ir->next;
975981
}
976-
ir->imm2 = count;
977-
ir->opcode = rv_insn_fuse1;
978-
ir->impl = dispatch_table[ir->opcode];
979-
ir->tailcall = next_ir->tailcall;
982+
if (count > 1) {
983+
ir->opcode = rv_insn_fuse1;
984+
ir->fuse = malloc(count * sizeof(opcode_fuse_t));
985+
ir->imm2 = count;
986+
memcpy(ir->fuse, ir, sizeof(opcode_fuse_t));
987+
ir->impl = dispatch_table[ir->opcode];
988+
next_ir = ir->next;
989+
for (int j = 1; j < count; j++, next_ir = next_ir->next) {
990+
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t));
991+
}
992+
remove_next_nth_ir(rv, ir, block, count - 1);
993+
}
980994
break;
981995
}
982996
break;
@@ -1055,7 +1069,7 @@ static block_t *block_find_or_translate(riscv_t *rv)
10551069
if (likely(!rv->debug_mode))
10561070
#endif
10571071
/* macro operation fusion */
1058-
match_pattern(next);
1072+
match_pattern(rv, next);
10591073
}
10601074
/* insert the block into block map */
10611075
block_insert(&rv->block_map, next);

0 commit comments

Comments
 (0)