Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 29 additions & 5 deletions llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,20 +243,44 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
assert(LI->isSimple());
IRBuilder<> Builder(LI);

Value *FirstActive =
*llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
VectorType *ResVTy = cast<VectorType>(FirstActive->getType());
auto FirstActiveItr =
llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
VectorType *ResVTy = cast<VectorType>((*FirstActiveItr)->getType());

const DataLayout &DL = LI->getDataLayout();

if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
LI->getPointerAddressSpace(), DL))
return false;

Value *Return;
Type *PtrTy = LI->getPointerOperandType();
Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());

// If the segment load is going to be performed segment at a time anyways
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// If the segment load is going to be performed segment at a time anyways
// If the segment load is going to be performed one segment at a time anyways

// and there's only one element used, use a strided load instead. This
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

element -> field?

// will be equally fast, and create less vector register pressure.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// will be equally fast, and create less vector register pressure.
// will be equally fast, and creates less vector register pressure.

if (!Subtarget.hasOptimizedSegmentLoadStore(Factor) &&
1 == llvm::count_if(DeinterleaveValues,
[](Value *V) { return V != nullptr; })) {
Comment on lines +262 to +263
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit

Suggested change
1 == llvm::count_if(DeinterleaveValues,
[](Value *V) { return V != nullptr; })) {
llvm::count_if(DeinterleaveValues,
[](Value *V) { return V != nullptr; }) == 1) {

unsigned Idx = std::distance(DeinterleaveValues.begin(), FirstActiveItr);
unsigned ScalarSizeInBytes = DL.getTypeStoreSize(ResVTy->getElementType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Idx * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
Type *I32 = Type::getIntNTy(LI->getContext(), 32);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type::getInt32Ty?

Value *VL = Builder.CreateElementCount(I32, ResVTy->getElementCount());

CallInst *CI =
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
{ResVTy, BasePtr->getType(), Stride->getType()},
{BasePtr, Stride, Mask, VL});
Align A = commonAlignment(LI->getAlign(), Idx * ScalarSizeInBytes);
CI->addParamAttr(0, Attribute::getWithAlignment(CI->getContext(), A));
(*FirstActiveItr)->replaceAllUsesWith(CI);
return true;
}

Value *Return;
if (isa<FixedVectorType>(ResVTy)) {
Value *VL = Builder.CreateElementCount(XLenTy, ResVTy->getElementCount());
Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount());
Expand Down
11 changes: 7 additions & 4 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -407,8 +407,9 @@ define { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
Expand All @@ -419,8 +420,10 @@ define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive(ptr %p) {
define <vscale x 8 x i8> @vector_deinterleave_load_factor4_oneactive2(ptr %p) {
; CHECK-LABEL: vector_deinterleave_load_factor4_oneactive2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; CHECK-NEXT: vlseg4e8.v v5, (a0)
; CHECK-NEXT: addi a0, a0, 3
; CHECK-NEXT: li a1, 4
; CHECK-NEXT: vsetvli a2, zero, e8, m1, ta, ma
; CHECK-NEXT: vlse8.v v8, (a0), a1
; CHECK-NEXT: ret
%vec = load <vscale x 32 x i8>, ptr %p
%d0 = call { <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i8> } @llvm.vector.deinterleave4(<vscale x 32 x i8> %vec)
Expand Down
Loading