Skip to content

Conversation

preames
Copy link
Collaborator

@preames preames commented Jul 15, 2025

This adds the analogous handling we use for the shuffle lowering to the deinterleaveN intrinsic path.

This adds the analogous handling we use for the shuffle lowering to the
deinterleaveN intrinsic path.
@llvmbot
Copy link
Member

llvmbot commented Jul 15, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

This adds the analogous handling we use for the shuffle lowering to the deinterleaveN intrinsic path.


Full diff: https://github.com/llvm/llvm-project/pull/148892.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp (+29-5)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll (+7-4)
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 39603b92cc2f7..7dac87b07e990 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -243,20 +243,44 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
   assert(LI->isSimple());
   IRBuilder<> Builder(LI);
 
-  Value *FirstActive =
-      *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
-  VectorType *ResVTy = cast<VectorType>(FirstActive->getType());
+  auto FirstActiveItr =
+      llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
+  VectorType *ResVTy = cast<VectorType>((*FirstActiveItr)->getType());
 
   const DataLayout &DL = LI->getDataLayout();
-
   if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
                                     LI->getPointerAddressSpace(), DL))
     return false;
 
-  Value *Return;
   Type *PtrTy = LI->getPointerOperandType();
   Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
 
+  // If the segment load is going to be performed segment at a time anyways
+  // and there's only one element used, use a strided load instead.  This
+  // will be equally fast, and create less vector register pressure.
+  if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
+      1 == llvm::count_if(DeinterleaveValues,
+                          [](Value *V) { return V != nullptr; })) {
+    unsigned Idx = std::distance(DeinterleaveValues.begin(), FirstActiveItr);
+    unsigned ScalarSizeInBytes = DL.getTypeStoreSize(ResVTy->getElementType());
+    Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
+    Value *Offset = ConstantInt::get(XLenTy, Idx * ScalarSizeInBytes);
+    Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
+    Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
+    Type *I32 = Type::getIntNTy(LI->getContext(), 32);
+    Value *VL = Builder.CreateElementCount(I32, ResVTy->getElementCount());
+
+    CallInst *CI =
+        Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
+                                {ResVTy, BasePtr->getType(), Stride->getType()},
+                                {BasePtr, Stride, Mask, VL});
+    Align A = commonAlignment(LI->getAlign(), Idx * ScalarSizeInBytes);
+    CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), A));
+    (*FirstActiveItr)->replaceAllUsesWith(CI);
+    return true;
+  }
+
+  Value *Return;
   if (isa<FixedVectorType>(ResVTy)) {
     Value *VL = Builder.CreateElementCount(XLenTy, ResVTy->getElementCount());
     Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
index 9af92aa995f1f..e28428224c2ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
@@ -407,8 +407,9 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
 define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
 ; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vlseg4e8.v v8, (a0)
+; CHECK-NEXT:    li a1, 4
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
 ; CHECK-NEXT:    ret
   %vec = load <vscale x 32 x i8>, ptr %p
   %d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
@@ -419,8 +420,10 @@ define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
 define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) {
 ; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT:    vlseg4e8.v v5, (a0)
+; CHECK-NEXT:    addi a0, a0, 3
+; CHECK-NEXT:    li a1, 4
+; CHECK-NEXT:    vsetvli a2, zero, e8, m1, ta, ma
+; CHECK-NEXT:    vlse8.v v8, (a0), a1
 ; CHECK-NEXT:    ret
   %vec = load <vscale x 32 x i8>, ptr %p
   %d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)

Type *PtrTy = LI->getPointerOperandType();
Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());

// If the segment load is going to be performed segment at a time anyways
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// If the segment load is going to be performed segment at a time anyways
// If the segment load is going to be performed one segment at a time anyways

Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());

// If the segment load is going to be performed segment at a time anyways
// and there's only one element used, use a strided load instead. This
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

element -> field?


// If the segment load is going to be performed segment at a time anyways
// and there's only one element used, use a strided load instead. This
// will be equally fast, and create less vector register pressure.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// will be equally fast, and create less vector register pressure.
// will be equally fast, and creates less vector register pressure.

Value *Offset = ConstantInt::get(XLenTy, Idx * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
Type *I32 = Type::getIntNTy(LI->getContext(), 32);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type::getInt32Ty?

Copy link
Contributor

@lukel97 lukel97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Comment on lines +262 to +263
1 == llvm::count_if(DeinterleaveValues,
[](Value *V) { return V != nullptr; })) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit

Suggested change
1 == llvm::count_if(DeinterleaveValues,
[](Value *V) { return V != nullptr; })) {
llvm::count_if(DeinterleaveValues,
[](Value *V) { return V != nullptr; }) == 1) {

@preames
Copy link
Collaborator Author

preames commented Jul 16, 2025

This patch conflicts with #148716. As I was already planning on reworking this to be a combine eventually, I'm going to close this for the moment, and post something new once the API has stabilized a bit.

@preames preames closed this Jul 16, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants