Skip to content

Commit 1ebe16b

Browse files
[RISCV] Add VL optimization related tests
These tests are good candidate for VL optimization. This is a pre-commit for PR #108640, but can could probably also be improved by the peephole VL optimizations.
1 parent 6b78ea8 commit 1ebe16b

File tree

3 files changed

+269
-0
lines changed

3 files changed

+269
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
3+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
4+
5+
declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)
6+
declare <vscale x 4 x i32> @llvm.riscv.vrgather.vv.nxv4i32.iXLen(
7+
<vscale x 4 x i32>,
8+
<vscale x 4 x i32>,
9+
<vscale x 4 x i32>,
10+
iXLen)
11+
12+
declare <vscale x 4 x i32> @llvm.riscv.vslidedown.nxv4i32(
13+
<vscale x 4 x i32>,
14+
<vscale x 4 x i32>,
15+
iXLen,
16+
iXLen,
17+
iXLen);
18+
19+
declare <vscale x 4 x i32> @llvm.riscv.vslide1down.nxv4i32.i32(
20+
<vscale x 4 x i32>,
21+
<vscale x 4 x i32>,
22+
i32,
23+
iXLen);
24+
25+
define <vscale x 4 x i32> @vrgather(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
26+
; CHECK-LABEL: vrgather:
27+
; CHECK: # %bb.0:
28+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
29+
; CHECK-NEXT: vadd.vv v12, v10, v12
30+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
31+
; CHECK-NEXT: vrgather.vv v8, v12, v10
32+
; CHECK-NEXT: ret
33+
%v = add <vscale x 4 x i32> %a, %b
34+
%w = call <vscale x 4 x i32> @llvm.riscv.vrgather.vv.nxv4i32.iXLen(
35+
<vscale x 4 x i32> poison,
36+
<vscale x 4 x i32> %v,
37+
<vscale x 4 x i32> %a,
38+
iXLen %vl1)
39+
40+
ret <vscale x 4 x i32> %w
41+
}
42+
43+
define <vscale x 4 x i32> @vslidedown(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, iXLen %2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
44+
; CHECK-LABEL: vslidedown:
45+
; CHECK: # %bb.0: # %entry
46+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
47+
; CHECK-NEXT: vadd.vv v10, v12, v14
48+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
49+
; CHECK-NEXT: vslidedown.vx v8, v10, a0
50+
; CHECK-NEXT: ret
51+
entry:
52+
%v = add <vscale x 4 x i32> %a, %b
53+
%w = call <vscale x 4 x i32> @llvm.riscv.vslidedown.nxv4i32(
54+
<vscale x 4 x i32> %0,
55+
<vscale x 4 x i32> %v,
56+
iXLen %2,
57+
iXLen %2,
58+
iXLen 1)
59+
60+
ret <vscale x 4 x i32> %w
61+
}
62+
63+
define <vscale x 4 x i32> @vslide1down(<vscale x 4 x i32> %0, i32 %1, iXLen %2, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) nounwind {
64+
; CHECK-LABEL: vslide1down:
65+
; CHECK: # %bb.0: # %entry
66+
; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma
67+
; CHECK-NEXT: vadd.vv v8, v10, v12
68+
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
69+
; CHECK-NEXT: vslide1down.vx v8, v8, a0
70+
; CHECK-NEXT: ret
71+
entry:
72+
%v = add <vscale x 4 x i32> %a, %b
73+
%w = call <vscale x 4 x i32> @llvm.riscv.vslide1down.nxv4i32.i32(
74+
<vscale x 4 x i32> poison,
75+
<vscale x 4 x i32> %v,
76+
i32 %1,
77+
iXLen %2)
78+
79+
ret <vscale x 4 x i32> %w
80+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s
3+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvl512b -verify-machineinstrs | FileCheck %s
4+
5+
define <2 x i32> @vdot_lane_s32(<2 x i32> noundef %var_1, <8 x i8> noundef %var_3, <8 x i8> noundef %var_5, <8 x i16> %x) {
6+
; CHECK-LABEL: vdot_lane_s32:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
9+
; CHECK-NEXT: vnsrl.wi v8, v11, 0
10+
; CHECK-NEXT: vnsrl.wi v9, v11, 16
11+
; CHECK-NEXT: vwadd.vv v10, v8, v9
12+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
13+
; CHECK-NEXT: vnsrl.wi v8, v10, 0
14+
; CHECK-NEXT: li a0, 32
15+
; CHECK-NEXT: vnsrl.wx v9, v10, a0
16+
; CHECK-NEXT: vadd.vv v8, v8, v9
17+
; CHECK-NEXT: ret
18+
entry:
19+
%a = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20+
%b = shufflevector <8 x i16> %x, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
21+
%c = sext <4 x i16> %a to <4 x i32>
22+
%d = sext <4 x i16> %b to <4 x i32>
23+
%e = add nsw <4 x i32> %c, %d
24+
%z10 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
25+
%z11 = shufflevector <4 x i32> %e, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
26+
%y12 = add <2 x i32> %z10, %z11
27+
ret <2 x i32> %y12
28+
}
29+
30+
declare <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
31+
<vscale x 2 x i16>,
32+
<vscale x 2 x i32>,
33+
<vscale x 2 x i16>,
34+
iXLen);
35+
36+
define <vscale x 2 x i16> @intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
37+
; CHECK-LABEL: intrinsic_vnsrl_wv_nxv2i16_nxv2i32_nxv2i16:
38+
; CHECK: # %bb.0: # %entry
39+
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
40+
; CHECK-NEXT: vwadd.vv v10, v8, v9
41+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
42+
; CHECK-NEXT: vnsrl.wv v8, v10, v12
43+
; CHECK-NEXT: ret
44+
entry:
45+
%c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
46+
%d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
47+
%v1 = add <vscale x 2 x i32> %c, %d
48+
%x = call <vscale x 2 x i16> @llvm.riscv.vnsrl.nxv2i16.nxv2i32.nxv2i16(
49+
<vscale x 2 x i16> undef,
50+
<vscale x 2 x i32> %v1,
51+
<vscale x 2 x i16> %z,
52+
iXLen %2)
53+
54+
ret <vscale x 2 x i16> %x
55+
}
56+
57+
declare <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
58+
<vscale x 2 x i16>,
59+
<vscale x 2 x i32>,
60+
<vscale x 2 x i16>,
61+
iXLen, iXLen);
62+
63+
define <vscale x 2 x i16> @vnclip(<vscale x 2 x i16> %a, <vscale x 2 x i16> %b, iXLen %2, <vscale x 2 x i32> %3, <vscale x 2 x i32> %4, <vscale x 2 x i16> %z) nounwind {
64+
; CHECK-LABEL: vnclip:
65+
; CHECK: # %bb.0: # %entry
66+
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
67+
; CHECK-NEXT: vwadd.vv v10, v8, v9
68+
; CHECK-NEXT: csrwi vxrm, 0
69+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
70+
; CHECK-NEXT: vnclip.wv v8, v10, v12
71+
; CHECK-NEXT: ret
72+
entry:
73+
%c = sext <vscale x 2 x i16> %a to <vscale x 2 x i32>
74+
%d = sext <vscale x 2 x i16> %b to <vscale x 2 x i32>
75+
%v1 = add <vscale x 2 x i32> %c, %d
76+
%x = call <vscale x 2 x i16> @llvm.riscv.vnclip.nxv2i16.nxv2i32.nxv2i16(
77+
<vscale x 2 x i16> undef,
78+
<vscale x 2 x i32> %v1,
79+
<vscale x 2 x i16> %z,
80+
iXLen 0, iXLen %2)
81+
82+
ret <vscale x 2 x i16> %x
83+
}

llvm/test/CodeGen/RISCV/rvv/vl-opt.ll

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
3+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
4+
5+
declare <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, iXLen)
6+
7+
define <vscale x 4 x i32> @different_imm_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
8+
; CHECK-LABEL: different_imm_vl_with_ta:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
11+
; CHECK-NEXT: vadd.vv v8, v10, v12
12+
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
13+
; CHECK-NEXT: vadd.vv v8, v8, v10
14+
; CHECK-NEXT: ret
15+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
16+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 4)
17+
ret <vscale x 4 x i32> %w
18+
}
19+
20+
; No benificial to propagate VL since VL is larger in the use side.
21+
define <vscale x 4 x i32> @different_imm_vl_with_ta_larger_vl(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
22+
; CHECK-LABEL: different_imm_vl_with_ta_larger_vl:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
25+
; CHECK-NEXT: vadd.vv v8, v10, v12
26+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma
27+
; CHECK-NEXT: vadd.vv v8, v8, v10
28+
; CHECK-NEXT: ret
29+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
30+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen 5)
31+
ret <vscale x 4 x i32> %w
32+
}
33+
34+
define <vscale x 4 x i32> @different_imm_reg_vl_with_ta(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
35+
; CHECK-LABEL: different_imm_reg_vl_with_ta:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
38+
; CHECK-NEXT: vadd.vv v8, v10, v12
39+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
40+
; CHECK-NEXT: vadd.vv v8, v8, v10
41+
; CHECK-NEXT: ret
42+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 4)
43+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
44+
ret <vscale x 4 x i32> %w
45+
}
46+
47+
48+
; No benificial to propagate VL since VL is already one.
49+
define <vscale x 4 x i32> @different_imm_vl_with_ta_1(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
50+
; CHECK-LABEL: different_imm_vl_with_ta_1:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
53+
; CHECK-NEXT: vadd.vv v8, v10, v12
54+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
55+
; CHECK-NEXT: vadd.vv v8, v8, v10
56+
; CHECK-NEXT: ret
57+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 1)
58+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a, iXLen %vl1)
59+
ret <vscale x 4 x i32> %w
60+
}
61+
62+
; Propgate %vl2 to last instruction since it is may smaller than %vl1,
63+
; it's still safe even %vl2 is larger than %vl1, becuase rest of the vector are
64+
; undefined value.
65+
define <vscale x 4 x i32> @different_vl_with_ta(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
66+
; CHECK-LABEL: different_vl_with_ta:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
69+
; CHECK-NEXT: vadd.vv v10, v8, v10
70+
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
71+
; CHECK-NEXT: vadd.vv v8, v10, v8
72+
; CHECK-NEXT: ret
73+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
74+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
75+
ret <vscale x 4 x i32> %w
76+
}
77+
78+
; Test case to make sure VL won't propgate if using tail-undisturbed policy.
79+
define <vscale x 4 x i32> @different_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
80+
; CHECK-LABEL: different_vl_with_tu:
81+
; CHECK: # %bb.0:
82+
; CHECK-NEXT: vmv2r.v v14, v10
83+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma
84+
; CHECK-NEXT: vadd.vv v14, v10, v12
85+
; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma
86+
; CHECK-NEXT: vadd.vv v8, v14, v10
87+
; CHECK-NEXT: ret
88+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1)
89+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen %vl2)
90+
ret <vscale x 4 x i32> %w
91+
}
92+
93+
; Test case to make sure VL won't propgate if using tail-undisturbed policy.
94+
define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl1, iXLen %vl2) {
95+
; CHECK-LABEL: different_imm_vl_with_tu:
96+
; CHECK: # %bb.0:
97+
; CHECK-NEXT: vmv2r.v v14, v10
98+
; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
99+
; CHECK-NEXT: vadd.vv v14, v10, v12
100+
; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma
101+
; CHECK-NEXT: vadd.vv v8, v14, v10
102+
; CHECK-NEXT: ret
103+
%v = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen 5)
104+
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
105+
ret <vscale x 4 x i32> %w
106+
}

0 commit comments

Comments
 (0)