Skip to content

Commit 44892e5

Browse files
committed
[AArch64][SVE] Avoid transfer to GPRs for fp -> int -> fp conversions
When Neon is not available use SVE variants of FCVTZS, FCVTZU, UCVTF, and SCVTF for fp -> int -> fp conversions to avoid moving values to/from GPRs which may be expensive. Note: With +sme2p2 the single-element vector Neon variants of these instructions could be used instead (but that feature is not implemented yet). Follow up to llvm#112213.
1 parent 1fe64fe commit 44892e5

File tree

2 files changed

+65
-16
lines changed

2 files changed

+65
-16
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2421,6 +2421,41 @@ let Predicates = [HasSVEorSME] in {
24212421
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", AArch64fsqrt_mt>;
24222422
} // End HasSVEorSME
24232423

2424+
// Helper for creating fp -> int -> fp conversions using SVE.
2425+
class sve_fp_int_fp_cvt<Instruction PTRUE, Instruction FROM_INT, Instruction TO_INT, SubRegIndex sub>
2426+
: OutPatFrag<(ops node: $Rn),
2427+
(EXTRACT_SUBREG
2428+
(FROM_INT (IMPLICIT_DEF), (PTRUE 1),
2429+
(TO_INT (IMPLICIT_DEF), (PTRUE 1),
2430+
(INSERT_SUBREG (IMPLICIT_DEF), $Rn, sub))), sub)>;
2431+
2432+
// Some float -> int -> float conversion patterns where we want to keep the int
2433+
// values in FP registers using the SVE instructions to avoid costly GPR <-> FPR
2434+
// register transfers. Only used when NEON is not available (e.g. in streaming
2435+
// functions).
2436+
// TODO: When +sme2p2 is available single-element vectors should be preferred.
2437+
def HasNoNEON : Predicate<"!Subtarget->isNeonAvailable()">;
2438+
let Predicates = [HasSVEorSME, HasNoNEON] in {
2439+
def : Pat<
2440+
(f64 (sint_to_fp (i64 (fp_to_sint f64:$Rn)))),
2441+
(sve_fp_int_fp_cvt<PTRUE_D, SCVTF_ZPmZ_DtoD, FCVTZS_ZPmZ_DtoD, dsub> $Rn)>;
2442+
def : Pat<
2443+
(f64 (uint_to_fp (i64 (fp_to_uint f64:$Rn)))),
2444+
(sve_fp_int_fp_cvt<PTRUE_D, UCVTF_ZPmZ_DtoD, FCVTZU_ZPmZ_DtoD, dsub> $Rn)>;
2445+
def : Pat<
2446+
(f32 (sint_to_fp (i32 (fp_to_sint f32:$Rn)))),
2447+
(sve_fp_int_fp_cvt<PTRUE_S, SCVTF_ZPmZ_StoS, FCVTZS_ZPmZ_StoS, ssub> $Rn)>;
2448+
def : Pat<
2449+
(f32 (uint_to_fp (i32 (fp_to_uint f32:$Rn)))),
2450+
(sve_fp_int_fp_cvt<PTRUE_S, UCVTF_ZPmZ_StoS, FCVTZU_ZPmZ_StoS, ssub> $Rn)>;
2451+
def : Pat<
2452+
(f16 (sint_to_fp (i32 (fp_to_sint f16:$Rn)))),
2453+
(sve_fp_int_fp_cvt<PTRUE_H, SCVTF_ZPmZ_HtoH, FCVTZS_ZPmZ_HtoH, hsub> $Rn)>;
2454+
def : Pat<
2455+
(f16 (uint_to_fp (i32 (fp_to_uint f16:$Rn)))),
2456+
(sve_fp_int_fp_cvt<PTRUE_H, UCVTF_ZPmZ_HtoH, FCVTZU_ZPmZ_HtoH, hsub> $Rn)>;
2457+
} // End HasSVEorSME, HasNoNEON
2458+
24242459
let Predicates = [HasBF16, HasSVEorSME] in {
24252460
defm BFDOT_ZZZ : sve_float_dot<0b1, 0b0, ZPR32, ZPR16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot>;
24262461
defm BFDOT_ZZI : sve_float_dot_indexed<0b1, 0b00, ZPR16, ZPR3b16, "bfdot", nxv8bf16, int_aarch64_sve_bfdot_lane_v2>;

llvm/test/CodeGen/AArch64/sve-streaming-mode-cvt-fp-int-fp.ll

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@ target triple = "aarch64-unknown-linux-gnu"
88
define double @t1(double %x) {
99
; CHECK-LABEL: t1:
1010
; CHECK: // %bb.0: // %entry
11-
; CHECK-NEXT: fcvtzs x8, d0
12-
; CHECK-NEXT: scvtf d0, x8
11+
; CHECK-NEXT: ptrue p0.d, vl1
12+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
13+
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
14+
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
15+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
1316
; CHECK-NEXT: ret
1417
;
1518
; USE-NEON-NO-GPRS-LABEL: t1:
@@ -26,8 +29,11 @@ entry:
2629
define float @t2(float %x) {
2730
; CHECK-LABEL: t2:
2831
; CHECK: // %bb.0: // %entry
29-
; CHECK-NEXT: fcvtzs w8, s0
30-
; CHECK-NEXT: scvtf s0, w8
32+
; CHECK-NEXT: ptrue p0.s, vl1
33+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
34+
; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s
35+
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
36+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
3137
; CHECK-NEXT: ret
3238
;
3339
; USE-NEON-NO-GPRS-LABEL: t2:
@@ -44,10 +50,11 @@ entry:
4450
define half @t3(half %x) {
4551
; CHECK-LABEL: t3:
4652
; CHECK: // %bb.0: // %entry
47-
; CHECK-NEXT: fcvt s0, h0
48-
; CHECK-NEXT: fcvtzs w8, s0
49-
; CHECK-NEXT: scvtf s0, w8
50-
; CHECK-NEXT: fcvt h0, s0
53+
; CHECK-NEXT: ptrue p0.h, vl1
54+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
55+
; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
56+
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
57+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
5158
; CHECK-NEXT: ret
5259
;
5360
; USE-NEON-NO-GPRS-LABEL: t3:
@@ -66,8 +73,11 @@ entry:
6673
define double @t4(double %x) {
6774
; CHECK-LABEL: t4:
6875
; CHECK: // %bb.0: // %entry
69-
; CHECK-NEXT: fcvtzu x8, d0
70-
; CHECK-NEXT: ucvtf d0, x8
76+
; CHECK-NEXT: ptrue p0.d, vl1
77+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
78+
; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d
79+
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
80+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
7181
; CHECK-NEXT: ret
7282
;
7383
; USE-NEON-NO-GPRS-LABEL: t4:
@@ -84,8 +94,11 @@ entry:
8494
define float @t5(float %x) {
8595
; CHECK-LABEL: t5:
8696
; CHECK: // %bb.0: // %entry
87-
; CHECK-NEXT: fcvtzu w8, s0
88-
; CHECK-NEXT: ucvtf s0, w8
97+
; CHECK-NEXT: ptrue p0.s, vl1
98+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0
99+
; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
100+
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
101+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0
89102
; CHECK-NEXT: ret
90103
;
91104
; USE-NEON-NO-GPRS-LABEL: t5:
@@ -102,10 +115,11 @@ entry:
102115
define half @t6(half %x) {
103116
; CHECK-LABEL: t6:
104117
; CHECK: // %bb.0: // %entry
105-
; CHECK-NEXT: fcvt s0, h0
106-
; CHECK-NEXT: fcvtzu w8, s0
107-
; CHECK-NEXT: ucvtf s0, w8
108-
; CHECK-NEXT: fcvt h0, s0
118+
; CHECK-NEXT: ptrue p0.h, vl1
119+
; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0
120+
; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
121+
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
122+
; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0
109123
; CHECK-NEXT: ret
110124
;
111125
; USE-NEON-NO-GPRS-LABEL: t6:

0 commit comments

Comments
 (0)