Skip to content

Commit 9be1f5f

Browse files
committed
[RISCV] Add a combine to form masked.store from unit strided store
Add a DAG combine to form a masked.store from a masked_strided_store intrinsic with stride equal to element size. This is the store analogy to PR llvm#65674. As seen in the tests, this does pickup a few cases that we'd previously missed due to selection ordering. We match strided stores early without going through the recently added generic mscatter combines, and thus weren't recognizing the unit strided store.
1 parent 0722800 commit 9be1f5f

File tree

4 files changed

+22
-8
lines changed

4 files changed

+22
-8
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14368,6 +14368,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
1436814368
ISD::UNINDEXED, ISD::NON_EXTLOAD);
1436914369
return SDValue();
1437014370
}
14371+
case Intrinsic::riscv_masked_strided_store: {
14372+
auto *Store = cast<MemIntrinsicSDNode>(N);
14373+
SDValue Value = N->getOperand(2);
14374+
SDValue Base = N->getOperand(3);
14375+
SDValue Stride = N->getOperand(4);
14376+
SDValue Mask = N->getOperand(5);
14377+
14378+
// If the stride is equal to the element size in bytes, we can use
14379+
// a masked.store.
14380+
const unsigned ElementSize = Value.getValueType().getScalarStoreSize();
14381+
if (auto *StrideC = dyn_cast<ConstantSDNode>(Stride);
14382+
StrideC && StrideC->getZExtValue() == ElementSize)
14383+
return DAG.getMaskedStore(Store->getChain(), DL, Value, Base,
14384+
DAG.getUNDEF(XLenVT), Mask,
14385+
Store->getMemoryVT(), Store->getMemOperand(),
14386+
ISD::UNINDEXED, false);
14387+
return SDValue();
14388+
}
1437114389
case Intrinsic::riscv_vcpop:
1437214390
case Intrinsic::riscv_vcpop_mask:
1437314391
case Intrinsic::riscv_vfirst:

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11296,9 +11296,8 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs,
1129611296
define void @mscatter_unit_stride(<8 x i16> %val, ptr %base) {
1129711297
; CHECK-LABEL: mscatter_unit_stride:
1129811298
; CHECK: # %bb.0:
11299-
; CHECK-NEXT: li a1, 2
1130011299
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11301-
; CHECK-NEXT: vsse16.v v8, (a0), a1
11300+
; CHECK-NEXT: vse16.v v8, (a0)
1130211301
; CHECK-NEXT: ret
1130311302
%head = insertelement <8 x i1> poison, i1 true, i16 0
1130411303
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer
@@ -11311,9 +11310,8 @@ define void @mscatter_unit_stride_with_offset(<8 x i16> %val, ptr %base) {
1131111310
; CHECK-LABEL: mscatter_unit_stride_with_offset:
1131211311
; CHECK: # %bb.0:
1131311312
; CHECK-NEXT: addi a0, a0, 10
11314-
; CHECK-NEXT: li a1, 2
1131511313
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
11316-
; CHECK-NEXT: vsse16.v v8, (a0), a1
11314+
; CHECK-NEXT: vse16.v v8, (a0)
1131711315
; CHECK-NEXT: ret
1131811316
%head = insertelement <8 x i1> poison, i1 true, i16 0
1131911317
%allones = shufflevector <8 x i1> %head, <8 x i1> poison, <8 x i32> zeroinitializer

llvm/test/CodeGen/RISCV/rvv/mscatter-combine.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,7 @@ define void @stride_one_store(i64 %n, ptr %p) {
114114
; RV64: # %bb.0:
115115
; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma
116116
; RV64-NEXT: vmv.v.i v8, 0
117-
; RV64-NEXT: li a0, 8
118-
; RV64-NEXT: vsse64.v v8, (a1), a0
117+
; RV64-NEXT: vs1r.v v8, (a1)
119118
; RV64-NEXT: ret
120119
%step = tail call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64()
121120
%gep = getelementptr inbounds i64, ptr %p, <vscale x 1 x i64> %step

llvm/test/CodeGen/RISCV/rvv/strided-load-store-intrinsics.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,8 @@ define void @strided_store_i8_nostride(ptr %p, <32 x i8> %v, <32 x i1> %m) {
8989
; CHECK-LABEL: strided_store_i8_nostride:
9090
; CHECK: # %bb.0:
9191
; CHECK-NEXT: li a1, 32
92-
; CHECK-NEXT: li a2, 1
9392
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
94-
; CHECK-NEXT: vsse8.v v8, (a0), a2, v0.t
93+
; CHECK-NEXT: vse8.v v8, (a0), v0.t
9594
; CHECK-NEXT: ret
9695
call void @llvm.riscv.masked.strided.store.v32i8.p0.i64(<32 x i8> %v, ptr %p, i64 1, <32 x i1> %m)
9796
ret void

0 commit comments

Comments
 (0)