-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[RISCV][SelectionDAG] Sign extend splats of i32 in getConstant on RV64 #67027
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag ChangesWe get better constant materialization if we sign extend the value to be Full diff: https://github.com/llvm/llvm-project/pull/67027.diff 5 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index f12db53c7f08738..cc162eb6dbca62c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1620,7 +1620,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) ==
TargetLowering::TypePromoteInteger) {
EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT);
- APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
+ APInt NewVal;
+ if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT))
+ NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits());
+ else
+ NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits());
Elt = ConstantInt::get(*getContext(), NewVal);
}
// In other cases the element type is illegal and needs to be expanded, for
diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
index e3a878052ee19b6..98bc4081b3a34e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
@@ -14,26 +14,15 @@
; a constant SPLAT_VECTOR didn't follow suit.
define <2 x i16> @fixedlen(<2 x i32> %x) {
-; RV32-LABEL: fixedlen:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 16
-; RV32-NEXT: lui a0, 1048568
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT: vnsrl.wi v8, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: fixedlen:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v8, v8, 16
-; RV64-NEXT: lui a0, 131071
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: fixedlen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 16
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: ret
%v41 = insertelement <2 x i32> poison, i32 16, i32 0
%v42 = shufflevector <2 x i32> %v41, <2 x i32> poison, <2 x i32> zeroinitializer
%v43 = lshr <2 x i32> %x, %v42
@@ -63,3 +52,6 @@ define <vscale x 2 x i16> @scalable(<vscale x 2 x i32> %x) {
%v48 = and <vscale x 2 x i16> %v44, %v47
ret <vscale x 2 x i16> %v48
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
index 443fe93a618c50b..56d98981947c3c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
@@ -32,13 +32,11 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: addiw a0, a0, -1
+; CHECK-NEXT: addiw a1, a0, -1
; CHECK-NEXT: vsll.vv v10, v8, v9
; CHECK-NEXT: vsra.vv v9, v10, v9
; CHECK-NEXT: vmsne.vv v8, v8, v9
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: slli a0, a0, 31
+; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0
; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
@@ -116,13 +114,11 @@ define <vscale x 4 x i32> @vec_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32>
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: addiw a0, a0, -1
+; CHECK-NEXT: addiw a1, a0, -1
; CHECK-NEXT: vsll.vv v12, v8, v10
; CHECK-NEXT: vsra.vv v14, v12, v10
; CHECK-NEXT: vmsne.vv v10, v8, v14
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: slli a0, a0, 31
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 06f5d39622da834..69191ebb9256d79 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -60,21 +60,12 @@ define <vscale x 1 x i32> @vmulh_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %x) {
}
define <vscale x 1 x i32> @vmulh_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%vb = sext <vscale x 1 x i32> %splat1 to <vscale x 1 x i64>
@@ -141,21 +132,12 @@ define <vscale x 2 x i32> @vmulh_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %x) {
}
define <vscale x 2 x i32> @vmulh_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%vb = sext <vscale x 2 x i32> %splat1 to <vscale x 2 x i64>
@@ -222,21 +204,12 @@ define <vscale x 4 x i32> @vmulh_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %x) {
}
define <vscale x 4 x i32> @vmulh_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%vb = sext <vscale x 4 x i32> %splat1 to <vscale x 4 x i64>
@@ -303,21 +276,12 @@ define <vscale x 8 x i32> @vmulh_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %x) {
}
define <vscale x 8 x i32> @vmulh_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%vb = sext <vscale x 8 x i32> %splat1 to <vscale x 8 x i64>
@@ -348,3 +312,6 @@ define <vscale x 8 x i32> @vmulh_vi_nxv8i32_1(<vscale x 8 x i32> %va) {
%vf = trunc <vscale x 8 x i64> %ve to <vscale x 8 x i32>
ret <vscale x 8 x i32> %vf
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
index 186d56b1293dbef..5354c17fd2a7d7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
@@ -37,21 +37,12 @@ define <vscale x 1 x i32> @vmulhu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %x) {
}
define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%vb = zext <vscale x 1 x i32> %splat1 to <vscale x 1 x i64>
@@ -124,21 +115,12 @@ define <vscale x 2 x i32> @vmulhu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %x) {
}
define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%vb = zext <vscale x 2 x i32> %splat1 to <vscale x 2 x i64>
@@ -211,21 +193,12 @@ define <vscale x 4 x i32> @vmulhu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %x) {
}
define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%vb = zext <vscale x 4 x i32> %splat1 to <vscale x 4 x i64>
@@ -298,21 +271,12 @@ define <vscale x 8 x i32> @vmulhu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %x) {
}
define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%vb = zext <vscale x 8 x i32> %splat1 to <vscale x 8 x i64>
|
This seems reasonable, but I'd like a second opinion from someone more knowledgeable. Maybe @topperc? |
I'm not opposed to this patch. The other thought I have would be to add all of the vector .vx pseudoinstructions to hasAllNBitUsers in RISCVISelDAGToDAG.cpp. When we do constant materialization we call that to see if the upper bits of the immediate are used. |
Would we would need a way to tell if the user is being used as a "element" operand in the pseudo, e.g. so we don't assume the x in vslidedown.vx is truncated to SEW? |
Yes but that's already something that function does for other instructions. For example the pointer operand of the scalar store SW demands all bits but the data operand only demands the lower 32 bits. |
Trying this out locally, this seems to catch more cases. Will work on a PR for it |
Opened #67419. Would it still be useful to get this PR in? I think it would be good to improve the target independent case, even if |
We get better constant materialization if we sign extend the value to be splatted for i32 on RV64 instead of zero extending it.
d87b388
to
9af4b71
Compare
LGTM. I'd originally planned to defer to @topperc, but with his sole comment being not opposed and a week of silence, let's just go ahead here. (edit: Possibly important context - this is essentially nfc in practice at this point, and the lack of test changes make it a lot easier to convince myself this is safe.) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
We get better constant materialization if we sign extend the value to be
splatted for i32 on RV64 instead of zero extending it.