@@ -398,29 +398,18 @@ enum {
398
398
#undef _
399
399
};
400
400
401
- /* FIXME: This will simply find the n-th instruction by iterating
402
- * the linked list linearly, we may want to find better approach. */
403
- FORCE_INLINE rv_insn_t * next_nth_insn (rv_insn_t * ir , int32_t n )
404
- {
405
- rv_insn_t * tmp = ir ;
406
- for (int32_t iter = 0 ; iter < n ; iter ++ )
407
- tmp = tmp -> next ;
408
- return tmp ;
409
- }
410
-
411
401
/* multiple lui */
412
402
static bool do_fuse1 (riscv_t * rv , rv_insn_t * ir )
413
403
{
414
404
rv -> csr_cycle += ir -> imm2 ;
415
- int i ;
416
- rv_insn_t * cur_ir ;
417
- for (i = 0 , cur_ir = ir ; i < ir -> imm2 ; i ++ , cur_ir = cur_ir -> next ) {
418
- rv -> X [cur_ir -> rd ] = cur_ir -> imm ;
405
+ opcode_fuse_t * fuse = ir -> fuse ;
406
+ for (int i = 0 ; i < ir -> imm2 ; i ++ ) {
407
+ rv -> X [fuse [i ].rd ] = fuse [i ].imm ;
419
408
}
420
409
rv -> PC += ir -> imm2 * ir -> insn_len ;
421
410
if (unlikely (RVOP_NO_NEXT (ir )))
422
411
return true;
423
- const rv_insn_t * next = next_nth_insn ( ir , ir -> imm2 ) ;
412
+ const rv_insn_t * next = ir -> next ;
424
413
MUST_TAIL return next -> impl (rv , next );
425
414
}
426
415
@@ -433,7 +422,7 @@ static bool do_fuse2(riscv_t *rv, rv_insn_t *ir)
433
422
rv -> PC += 2 * ir -> insn_len ;
434
423
if (unlikely (RVOP_NO_NEXT (ir )))
435
424
return true;
436
- const rv_insn_t * next = next_nth_insn ( ir , 2 ) ;
425
+ const rv_insn_t * next = ir -> next ;
437
426
MUST_TAIL return next -> impl (rv , next );
438
427
}
439
428
@@ -456,7 +445,7 @@ static bool do_fuse3(riscv_t *rv, rv_insn_t *ir)
456
445
rv -> PC += ir -> imm2 * ir -> insn_len ;
457
446
if (unlikely (RVOP_NO_NEXT (ir )))
458
447
return true;
459
- const rv_insn_t * next = next_nth_insn ( ir , ir -> imm2 ) ;
448
+ const rv_insn_t * next = ir -> next ;
460
449
MUST_TAIL return next -> impl (rv , next );
461
450
}
462
451
@@ -479,7 +468,7 @@ static bool do_fuse4(riscv_t *rv, rv_insn_t *ir)
479
468
rv -> PC += ir -> imm2 * ir -> insn_len ;
480
469
if (unlikely (RVOP_NO_NEXT (ir )))
481
470
return true;
482
- const rv_insn_t * next = next_nth_insn ( ir , ir -> imm2 ) ;
471
+ const rv_insn_t * next = ir -> next ;
483
472
MUST_TAIL return next -> impl (rv , next );
484
473
}
485
474
@@ -622,7 +611,7 @@ static void block_translate(riscv_t *rv, block_map_t *map, block_t *block)
622
611
for (int j = 1; j < count; j++, next_ir = next_ir->next) { \
623
612
memcpy(ir->fuse + j, next_ir, sizeof(opcode_fuse_t)); \
624
613
} \
625
- ir->tailcall = next_ir->tailcall; \
614
+ remove_next_nth_ir(rv, ir, block, count - 1); \
626
615
}
627
616
628
617
static bool detect_memset (riscv_t * rv , int lib )
@@ -851,6 +840,23 @@ static bool detect_memcpy(riscv_t *rv, int lib)
851
840
return true;
852
841
}
853
842
843
+ FORCE_INLINE void remove_next_nth_ir (riscv_t * rv ,
844
+ rv_insn_t * ir ,
845
+ block_t * block ,
846
+ uint8_t n )
847
+ {
848
+ for (uint8_t i = 0 ; i < n ; i ++ ) {
849
+ rv_insn_t * next = ir -> next ;
850
+ ir -> next = ir -> next -> next ;
851
+ mpool_free (rv -> block_map .block_ir_mp , next );
852
+ }
853
+ if (!ir -> next ) {
854
+ block -> ir_tail = ir ;
855
+ ir -> tailcall = true;
856
+ }
857
+ block -> n_insn -= n ;
858
+ }
859
+
854
860
static bool libc_substitute (riscv_t * rv , block_t * block )
855
861
{
856
862
rv_insn_t * ir = block -> ir_head , * next_ir = NULL ;
@@ -872,7 +878,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
872
878
if (detect_memset (rv , 1 )) {
873
879
ir -> opcode = rv_insn_fuse5 ;
874
880
ir -> impl = dispatch_table [ir -> opcode ];
875
- ir -> tailcall = true ;
881
+ remove_next_nth_ir ( rv , ir , block , 2 ) ;
876
882
return true;
877
883
};
878
884
}
@@ -885,12 +891,12 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
885
891
if (next_ir -> imm == 20 && detect_memset (rv , 2 )) {
886
892
ir -> opcode = rv_insn_fuse5 ;
887
893
ir -> impl = dispatch_table [ir -> opcode ];
888
- ir -> tailcall = true ;
894
+ remove_next_nth_ir ( rv , ir , block , 2 ) ;
889
895
return true;
890
896
} else if (next_ir -> imm == 28 && detect_memcpy (rv , 2 )) {
891
897
ir -> opcode = rv_insn_fuse6 ;
892
898
ir -> impl = dispatch_table [ir -> opcode ];
893
- ir -> tailcall = true ;
899
+ remove_next_nth_ir ( rv , ir , block , 2 ) ;
894
900
return true;
895
901
};
896
902
}
@@ -918,7 +924,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
918
924
if (detect_memcpy (rv , 1 )) {
919
925
ir -> opcode = rv_insn_fuse6 ;
920
926
ir -> impl = dispatch_table [ir -> opcode ];
921
- ir -> tailcall = true ;
927
+ remove_next_nth_ir ( rv , ir , block , 3 ) ;
922
928
return true;
923
929
};
924
930
}
@@ -938,7 +944,7 @@ static bool libc_substitute(riscv_t *rv, block_t *block)
938
944
* Strategies are being devised to increase the number of instructions that
939
945
* match the pattern, including possible instruction reordering.
940
946
*/
941
- static void match_pattern (block_t * block )
947
+ static void match_pattern (riscv_t * rv , block_t * block )
942
948
{
943
949
uint32_t i ;
944
950
rv_insn_t * ir ;
@@ -960,23 +966,31 @@ static void match_pattern(block_t *block)
960
966
ir -> rs1 = next_ir -> rs2 ;
961
967
ir -> impl = dispatch_table [ir -> opcode ];
962
968
ir -> tailcall = next_ir -> tailcall ;
969
+ remove_next_nth_ir (rv , ir , block , 1 );
963
970
}
964
971
break ;
965
972
case rv_insn_lui :
966
973
count = 1 ;
967
974
while (1 ) {
968
975
if (next_ir -> opcode != rv_insn_lui )
969
976
break ;
970
- next_ir -> opcode = rv_insn_nop ;
971
977
count ++ ;
972
978
if (next_ir -> tailcall )
973
979
break ;
974
980
next_ir = next_ir -> next ;
975
981
}
976
- ir -> imm2 = count ;
977
- ir -> opcode = rv_insn_fuse1 ;
978
- ir -> impl = dispatch_table [ir -> opcode ];
979
- ir -> tailcall = next_ir -> tailcall ;
982
+ if (count > 1 ) {
983
+ ir -> opcode = rv_insn_fuse1 ;
984
+ ir -> fuse = malloc (count * sizeof (opcode_fuse_t ));
985
+ ir -> imm2 = count ;
986
+ memcpy (ir -> fuse , ir , sizeof (opcode_fuse_t ));
987
+ ir -> impl = dispatch_table [ir -> opcode ];
988
+ next_ir = ir -> next ;
989
+ for (int j = 1 ; j < count ; j ++ , next_ir = next_ir -> next ) {
990
+ memcpy (ir -> fuse + j , next_ir , sizeof (opcode_fuse_t ));
991
+ }
992
+ remove_next_nth_ir (rv , ir , block , count - 1 );
993
+ }
980
994
break ;
981
995
}
982
996
break ;
@@ -1055,7 +1069,7 @@ static block_t *block_find_or_translate(riscv_t *rv)
1055
1069
if (likely (!rv -> debug_mode ))
1056
1070
#endif
1057
1071
/* macro operation fusion */
1058
- match_pattern (next );
1072
+ match_pattern (rv , next );
1059
1073
}
1060
1074
/* insert the block into block map */
1061
1075
block_insert (& rv -> block_map , next );
0 commit comments