diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index af1e0d7251a4f..9a2773c06bae6 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -24,6 +24,7 @@ namespace llvm { class TargetLibraryInfo; +class IntrinsicInst; /// The Vector Function Database. /// @@ -188,6 +189,10 @@ LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID); /// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics. LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID); +/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each +/// factor. +LLVM_ABI VectorType *getDeinterleavedVectorType(IntrinsicInst *DI); + /// Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 72594c7f9783c..238d07a20eec8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3251,10 +3251,9 @@ class LLVM_ABI TargetLoweringBase { /// /// \p Load is the accompanying load instruction. Can be either a plain load /// instruction or a vp.load intrinsic. - /// \p DeinterleaveValues contains the deinterleaved values. - virtual bool - lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, - ArrayRef DeinterleaveValues) const { + /// \p DI represents the deinterleaveN intrinsic. + virtual bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, + IntrinsicInst *DI) const { return false; } diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index 7f0ed0b60a785..1b3da590cff7f 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -306,6 +306,15 @@ unsigned llvm::getDeinterleaveIntrinsicFactor(Intrinsic::ID ID) { } } +VectorType *llvm::getDeinterleavedVectorType(IntrinsicInst *DI) { + [[maybe_unused]] unsigned Factor = + getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); + ArrayRef DISubtypes = DI->getType()->subtypes(); + assert(Factor && Factor == DISubtypes.size() && + "unexpected deinterleave factor or result type"); + return cast(DISubtypes[0]); +} + /// Given a vector and an element number, see if the scalar value is /// already around as a register, for example if it were inserted then extracted /// from the vector. diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 95599837e1bfc..68a956921c8e0 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -618,29 +618,13 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); assert(Factor && "unexpected deinterleave intrinsic"); - SmallVector DeinterleaveValues(Factor, nullptr); - Value *LastFactor = nullptr; - for (auto *User : DI->users()) { - auto *Extract = dyn_cast(User); - if (!Extract || Extract->getNumIndices() != 1) - return false; - unsigned Idx = Extract->getIndices()[0]; - if (DeinterleaveValues[Idx]) - return false; - DeinterleaveValues[Idx] = Extract; - LastFactor = Extract; - } - - if (!LastFactor) - return false; - Value *Mask = nullptr; if (auto *VPLoad = dyn_cast(LoadedVal)) { if (VPLoad->getIntrinsicID() != Intrinsic::vp_load) return false; // Check mask operand. Handle both all-true/false and interleaved mask. Value *WideMask = VPLoad->getOperand(1); - Mask = getMask(WideMask, Factor, cast(LastFactor->getType())); + Mask = getMask(WideMask, Factor, getDeinterleavedVectorType(DI)); if (!Mask) return false; @@ -657,12 +641,9 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( // Try and match this with target specific intrinsics. if (!TLI->lowerDeinterleaveIntrinsicToLoad(cast(LoadedVal), Mask, - DeinterleaveValues)) + DI)) return false; - for (Value *V : DeinterleaveValues) - if (V) - DeadInsts.insert(cast(V)); DeadInsts.insert(DI); // We now have a target-specific load, so delete the old one. DeadInsts.insert(cast(LoadedVal)); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4f13a14d24649..46c53843ba3a4 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17486,9 +17486,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, } bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( - Instruction *Load, Value *Mask, - ArrayRef DeinterleavedValues) const { - unsigned Factor = DeinterleavedValues.size(); + Instruction *Load, Value *Mask, IntrinsicInst *DI) const { + const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); if (Factor != 2 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching ld2 and ld4 patterns failed\n"); return false; @@ -17498,9 +17497,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( return false; assert(!Mask && "Unexpected mask on a load\n"); - Value *FirstActive = *llvm::find_if(DeinterleavedValues, - [](Value *V) { return V != nullptr; }); - VectorType *VTy = cast(FirstActive->getType()); + VectorType *VTy = getDeinterleavedVectorType(DI); const DataLayout &DL = LI->getModule()->getDataLayout(); bool UseScalable; @@ -17528,6 +17525,7 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue()); Value *BaseAddr = LI->getPointerOperand(); + Value *Result = nullptr; if (NumLoads > 1) { // Create multiple legal small ldN. SmallVector ExtractedLdValues(Factor, PoisonValue::get(VTy)); @@ -17548,25 +17546,20 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( } LLVM_DEBUG(dbgs() << "LdN4 res: "; LdN->dump()); } - // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 - for (unsigned J = 0; J < Factor; ++J) { - if (DeinterleavedValues[J]) - DeinterleavedValues[J]->replaceAllUsesWith(ExtractedLdValues[J]); - } + + // Merge the values from different factors. + Result = PoisonValue::get(DI->getType()); + for (unsigned J = 0; J < Factor; ++J) + Result = Builder.CreateInsertValue(Result, ExtractedLdValues[J], J); } else { - Value *Result; if (UseScalable) Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN"); else Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN"); - // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 - for (unsigned I = 0; I < Factor; I++) { - if (DeinterleavedValues[I]) { - Value *NewExtract = Builder.CreateExtractValue(Result, I); - DeinterleavedValues[I]->replaceAllUsesWith(NewExtract); - } - } } + + // Replace output of deinterleave2 intrinsic by output of ldN2/ldN4 + DI->replaceAllUsesWith(Result); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6afb3c330d25b..a19bf19387a8c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -218,9 +218,8 @@ class AArch64TargetLowering : public TargetLowering { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - bool lowerDeinterleaveIntrinsicToLoad( - Instruction *Load, Value *Mask, - ArrayRef DeinterleaveValues) const override; + bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, + IntrinsicInst *DI) const override; bool lowerInterleaveIntrinsicToStore( StoreInst *SI, ArrayRef InterleaveValues) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 3af729aaba2ae..e8adf561c9c35 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -437,9 +437,8 @@ class RISCVTargetLowering : public TargetLowering { bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override; - bool lowerDeinterleaveIntrinsicToLoad( - Instruction *Load, Value *Mask, - ArrayRef DeinterleaveValues) const override; + bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, + IntrinsicInst *DI) const override; bool lowerInterleaveIntrinsicToStore( StoreInst *SI, ArrayRef InterleaveValues) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index ddfacd970e950..025054d5a2a60 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -14,6 +14,7 @@ #include "RISCVISelLowering.h" #include "RISCVSubtarget.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -256,17 +257,14 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { } bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( - Instruction *Load, Value *Mask, - ArrayRef DeinterleaveValues) const { - const unsigned Factor = DeinterleaveValues.size(); + Instruction *Load, Value *Mask, IntrinsicInst *DI) const { + const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); if (Factor > 8) return false; IRBuilder<> Builder(Load); - Value *FirstActive = - *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; }); - VectorType *ResVTy = cast(FirstActive->getType()); + VectorType *ResVTy = getDeinterleavedVectorType(DI); const DataLayout &DL = Load->getDataLayout(); auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); @@ -346,16 +344,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( } } - for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) { - if (!DIV) - continue; - // We have to create a brand new ExtractValue to replace each - // of these old ExtractValue instructions. - Value *NewEV = - Builder.CreateExtractValue(Return, {static_cast(Idx)}); - DIV->replaceAllUsesWith(NewEV); - } - + DI->replaceAllUsesWith(Return); return true; } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index 3e822d357b667..807651c9b40c6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -274,6 +274,59 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3(ptr %p ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2 } +define { <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_partial(ptr %p) { +; CHECK-LABEL: vector_deinterleave_load_factor3_partial: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: ret + %vec = load <24 x i8>, ptr %p + %d0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec) + %t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0 + %t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2 + %res0 = insertvalue { <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8> } %res0, <8 x i8> %t2, 1 + ret { <8 x i8>, <8 x i8> } %res1 +} + +; InterleavedAccess should kick in even if the users of deinterleave intrinsic are not extractvalue. +define { <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor3_no_extract(ptr %p, ptr %p1, i1 %c) { +; CHECK-LABEL: vector_deinterleave_load_factor3_no_extract: +; CHECK: # %bb.0: +; CHECK-NEXT: andi a2, a2, 1 +; CHECK-NEXT: beqz a2, .LBB17_2 +; CHECK-NEXT: # %bb.1: # %bb0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB17_2: # %bb1 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vlseg3e8.v v6, (a1) +; CHECK-NEXT: ret + br i1 %c, label %bb0, label %bb1 + +bb0: + %vec0 = load <24 x i8>, ptr %p + %d0.0 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec0) + br label %merge + +bb1: + %vec1 = load <24 x i8>, ptr %p1 + %d0.1 = call {<8 x i8>, <8 x i8>, <8 x i8>} @llvm.vector.deinterleave3(<24 x i8> %vec1) + br label %merge + +merge: + %d0 = phi {<8 x i8>, <8 x i8>, <8 x i8>} [%d0.0, %bb0], [%d0.1, %bb1] + %t0 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 0 + %t1 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 1 + %t2 = extractvalue {<8 x i8>, <8 x i8>, <8 x i8>} %d0, 2 + %res0 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } poison, <8 x i8> %t0, 0 + %res1 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res0, <8 x i8> %t1, 0 + %res2 = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } %res1, <8 x i8> %t2, 0 + ret { <8 x i8>, <8 x i8>, <8 x i8> } %res2 +} + define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @vector_deinterleave_load_factor4(ptr %p) { ; CHECK-LABEL: vector_deinterleave_load_factor4: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index 7fb822d20f892..27ecbe56bda42 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -66,6 +66,115 @@ define {, , } @load_factor ret { , , } %res1 } +define {, } @load_factor3_partial(ptr %ptr, i32 %evl) { +; RV32-LABEL: load_factor3_partial: +; RV32: # %bb.0: +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lui a2, 699051 +; RV32-NEXT: addi a2, a2, -1365 +; RV32-NEXT: mulhu a1, a1, a2 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV32-NEXT: vlseg3e32.v v7, (a0) +; RV32-NEXT: vmv1r.v v8, v7 +; RV32-NEXT: ret +; +; RV64-LABEL: load_factor3_partial: +; RV64: # %bb.0: +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lui a2, 699051 +; RV64-NEXT: addi a2, a2, -1365 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: mulhu a1, a1, a2 +; RV64-NEXT: srli a1, a1, 33 +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vlseg3e32.v v7, (a0) +; RV64-NEXT: vmv1r.v v8, v7 +; RV64-NEXT: ret + %rvl = mul i32 %evl, 3 + %wide.masked.load = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl) + %deinterleaved.results = call { , , } @llvm.vector.deinterleave3( %wide.masked.load) + %t0 = extractvalue { , , } %deinterleaved.results, 0 + %t2 = extractvalue { , , } %deinterleaved.results, 2 + %res0 = insertvalue { , } poison, %t0, 0 + %res1 = insertvalue { , } %res0, %t2, 1 + ret { , } %res1 +} + +; InterleavedAccess should kick in even if the users of deinterleave intrinsic are not extractvalue. +define {, } @load_factor3_no_extract(ptr %ptr, i32 %evl) { +; RV32-LABEL: load_factor3_no_extract: +; RV32: # %bb.0: +; RV32-NEXT: li a2, 12 +; RV32-NEXT: beq a1, a2, .LBB3_2 +; RV32-NEXT: # %bb.1: # %bb0 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: lui a2, 699051 +; RV32-NEXT: addi a2, a2, -1365 +; RV32-NEXT: mulhu a1, a1, a2 +; RV32-NEXT: srli a1, a1, 1 +; RV32-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV32-NEXT: vlseg3e32.v v7, (a0) +; RV32-NEXT: j .LBB3_3 +; RV32-NEXT: .LBB3_2: # %bb1 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vlseg3e32.v v7, (a0) +; RV32-NEXT: .LBB3_3: # %merge +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vmv1r.v v8, v7 +; RV32-NEXT: ret +; +; RV64-LABEL: load_factor3_no_extract: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a2, a1 +; RV64-NEXT: li a3, 12 +; RV64-NEXT: beq a2, a3, .LBB3_2 +; RV64-NEXT: # %bb.1: # %bb0 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: lui a2, 699051 +; RV64-NEXT: addi a2, a2, -1365 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: mulhu a1, a1, a2 +; RV64-NEXT: srli a1, a1, 33 +; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: vlseg3e32.v v7, (a0) +; RV64-NEXT: j .LBB3_3 +; RV64-NEXT: .LBB3_2: # %bb1 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vlseg3e32.v v7, (a0) +; RV64-NEXT: .LBB3_3: # %merge +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vmv1r.v v8, v7 +; RV64-NEXT: ret + %p = icmp ne i32 %evl, 12 + br i1 %p, label %bb0, label %bb1 + +bb0: + %rvl.0 = mul i32 %evl, 3 + %wide.load.0 = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 %rvl.0) + %deinterleaved.results.0 = call { , , } @llvm.vector.deinterleave3( %wide.load.0) + br label %merge + +bb1: + %wide.load.1 = call @llvm.vp.load(ptr %ptr, splat (i1 true), i32 12) + %deinterleaved.results.1 = call { , , } @llvm.vector.deinterleave3( %wide.load.1) + br label %merge + +merge: + %deinterleaved.results = phi { , , } [%deinterleaved.results.0, %bb0], [%deinterleaved.results.1, %bb1] + %t0 = extractvalue { , , } %deinterleaved.results, 0 + %t2 = extractvalue { , , } %deinterleaved.results, 2 + %res0 = insertvalue { , } poison, %t0, 0 + %res1 = insertvalue { , } %res0, %t2, 1 + ret { , } %res1 +} + define {, , , } @load_factor4_v2(ptr %ptr, i32 %evl) { ; RV32-LABEL: load_factor4_v2: ; RV32: # %bb.0: diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll index 09e2c53465cd7..6c81d9a4d2ed6 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/fixed-deinterleave-intrinsics.ll @@ -10,8 +10,8 @@ define void @deinterleave_i8_factor2(ptr %ptr) { ; NEON-LABEL: define void @deinterleave_i8_factor2 ; NEON-SAME: (ptr [[PTR:%.*]]) { ; NEON-NEXT: [[LDN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[PTR]]) -; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 0 -; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[LDN]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_i8_factor2 @@ -33,8 +33,8 @@ define void @deinterleave_i16_factor2(ptr %ptr) { ; NEON-LABEL: define void @deinterleave_i16_factor2 ; NEON-SAME: (ptr [[PTR:%.*]]) { ; NEON-NEXT: [[LDN:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[PTR]]) -; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 0 -; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_i16_factor2 @@ -56,8 +56,8 @@ define void @deinterleave_8xi32_factor2(ptr %ptr) { ; NEON-LABEL: define void @deinterleave_8xi32_factor2 ; NEON-SAME: (ptr [[PTR:%.*]]) { ; NEON-NEXT: [[LDN:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[PTR]]) -; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0 -; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[LDN]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_8xi32_factor2 @@ -79,8 +79,8 @@ define void @deinterleave_i64_factor2(ptr %ptr) { ; NEON-LABEL: define void @deinterleave_i64_factor2 ; NEON-SAME: (ptr [[PTR:%.*]]) { ; NEON-NEXT: [[LDN:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[PTR]]) -; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0 -; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[LDN]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_i64_factor2 @@ -102,8 +102,8 @@ define void @deinterleave_float_factor2(ptr %ptr) { ; NEON-LABEL: define void @deinterleave_float_factor2 ; NEON-SAME: (ptr [[PTR:%.*]]) { ; NEON-NEXT: [[LDN:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr [[PTR]]) -; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 -; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <4 x float>, <4 x float> } [[LDN]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_float_factor2 @@ -125,8 +125,8 @@ define void @deinterleave_double_factor2(ptr %ptr) { ; NEON-LABEL: define void @deinterleave_double_factor2 ; NEON-SAME: (ptr [[PTR:%.*]]) { ; NEON-NEXT: [[LDN:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0(ptr [[PTR]]) -; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 0 -; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <2 x double>, <2 x double> } [[LDN]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_double_factor2 @@ -148,8 +148,8 @@ define void @deinterleave_ptr_factor2(ptr %ptr) { ; NEON-LABEL: define void @deinterleave_ptr_factor2 ; NEON-SAME: (ptr [[PTR:%.*]]) { ; NEON-NEXT: [[LDN:%.*]] = call { <2 x ptr>, <2 x ptr> } @llvm.aarch64.neon.ld2.v2p0.p0(ptr [[PTR]]) -; NEON-NEXT: [[TMP1:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 0 -; NEON-NEXT: [[TMP2:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <2 x ptr>, <2 x ptr> } [[LDN]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_ptr_factor2 @@ -301,6 +301,10 @@ define void @deinterleave_wide_i16_factor2(ptr %ptr) #0 { ; NEON-NEXT: [[TMP8:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP3]], <8 x i16> [[TMP7]], i64 8) ; NEON-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[LDN1]], 1 ; NEON-NEXT: [[TMP10:%.*]] = call <16 x i16> @llvm.vector.insert.v16i16.v8i16(<16 x i16> [[TMP5]], <8 x i16> [[TMP9]], i64 8) +; NEON-NEXT: [[TMP11:%.*]] = insertvalue { <16 x i16>, <16 x i16> } poison, <16 x i16> [[TMP8]], 0 +; NEON-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i16>, <16 x i16> } [[TMP11]], <16 x i16> [[TMP10]], 1 +; NEON-NEXT: [[EXTRACT1:%.*]] = extractvalue { <16 x i16>, <16 x i16> } [[TMP12]], 0 +; NEON-NEXT: [[EXTRACT2:%.*]] = extractvalue { <16 x i16>, <16 x i16> } [[TMP12]], 1 ; NEON-NEXT: ret void ; ; SVE-FIXED-LABEL: define void @deinterleave_wide_i16_factor2 diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll index 436389ba5b991..d7649801ea2fc 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/scalable-deinterleave-intrinsics.ll @@ -8,8 +8,8 @@ define void @deinterleave_nxi8_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxi8_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv16i8( splat (i1 true), ptr [[PTR]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[LDN]], 0 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 1 @@ -23,8 +23,8 @@ define void @deinterleave_nxi16_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxi16_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv8i16( splat (i1 true), ptr [[PTR]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 2 @@ -38,8 +38,8 @@ define void @deinterleave_nx8xi32_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nx8xi32_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( splat (i1 true), ptr [[PTR]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 4 @@ -53,8 +53,8 @@ define void @deinterleave_nxi64_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxi64_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv2i64( splat (i1 true), ptr [[PTR]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 8 @@ -68,8 +68,8 @@ define void @deinterleave_nxfloat_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxfloat_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv4f32( splat (i1 true), ptr [[PTR]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 4 @@ -83,8 +83,8 @@ define void @deinterleave_nxdouble_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxdouble_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( splat (i1 true), ptr [[PTR]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 8 @@ -98,8 +98,8 @@ define void @deinterleave_nxptr_factor2(ptr %ptr) #0 { ; CHECK-LABEL: define void @deinterleave_nxptr_factor2 ; CHECK-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[LDN:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv2p0( splat (i1 true), ptr [[PTR]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[LDN]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[LDN]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[LDN]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[LDN]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 8 @@ -215,6 +215,10 @@ define void @deinterleave_wide_nxi32_factor2(ptr %ptr) #0 { ; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP13]], [[TMP17]], i64 12) ; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { , } [[LDN3]], 1 ; CHECK-NEXT: [[TMP20:%.*]] = call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP15]], [[TMP19]], i64 12) +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { , } poison, [[TMP18]], 0 +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { , } [[TMP21]], [[TMP20]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[TMP22]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[TMP22]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 4 @@ -239,6 +243,10 @@ define void @deinterleave_wide_nxdouble_factor2(ptr %ptr) #0 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP7]], i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , } [[LDN1]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP9]], i64 2) +; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { , } poison, [[TMP8]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { , } [[TMP11]], [[TMP10]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[TMP12]], 1 ; CHECK-NEXT: ret void ; %load = load , ptr %ptr, align 8 diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll index c565066541d1d..58c0bccc3be38 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-deinterleave4.ll @@ -49,8 +49,16 @@ define void @wide_deinterleave4(ptr %src) { ; CHECK-NEXT: [[TMP16:%.*]] = call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP7]], [[TMP15]], i64 4) ; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { , , , } [[LDN1]], 3 ; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP9]], [[TMP17]], i64 4) -; CHECK-NEXT: [[SUM:%.*]] = add [[TMP12]], [[TMP14]] -; CHECK-NEXT: [[SUB:%.*]] = sub [[TMP16]], [[TMP18]] +; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { , , , } poison, [[TMP12]], 0 +; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { , , , } [[TMP19]], [[TMP14]], 1 +; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { , , , } [[TMP20]], [[TMP16]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = insertvalue { , , , } [[TMP21]], [[TMP18]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { , , , } [[TMP22]], 0 +; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { , , , } [[TMP22]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { , , , } [[TMP22]], 2 +; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { , , , } [[TMP22]], 3 +; CHECK-NEXT: [[SUM:%.*]] = add [[TMP23]], [[TMP24]] +; CHECK-NEXT: [[SUB:%.*]] = sub [[TMP25]], [[TMP26]] ; CHECK-NEXT: ret void ; %load = load , ptr %src, align 4 @@ -73,8 +81,8 @@ define void @mix_deinterleave4_deinterleave2(ptr %src) { ; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[LDN]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[LDN]], 3 ; CHECK-NEXT: [[LDN1:%.*]] = call { , } @llvm.aarch64.sve.ld2.sret.nxv4i32( splat (i1 true), ptr [[SRC]]) -; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , } [[LDN1]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , } [[LDN1]], 1 +; CHECK-NEXT: [[LD2_1:%.*]] = extractvalue { , } [[LDN1]], 0 +; CHECK-NEXT: [[LD2_2:%.*]] = extractvalue { , } [[LDN1]], 1 ; CHECK-NEXT: ret void ; @@ -95,12 +103,11 @@ define void @mix_deinterleave4_deinterleave2(ptr %src) { define void @negative_deinterleave4_test(ptr %src) { ; CHECK-LABEL: define void @negative_deinterleave4_test ; CHECK-SAME: (ptr [[SRC:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[LOAD:%.*]] = load , ptr [[SRC]], align 4 -; CHECK-NEXT: [[DEINTERLEAVE:%.*]] = tail call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[LOAD]]) -; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 1 -; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[DEINTERLEAVE]], 2 +; CHECK-NEXT: [[LDN:%.*]] = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( splat (i1 true), ptr [[SRC]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[LDN]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[LDN]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[LDN]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[LDN]], 2 ; CHECK-NEXT: ret void ; %load = load , ptr %src, align 4 diff --git a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll index b109448bd5d7c..1418ca09c0d61 100644 --- a/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/InterleavedAccess/AArch64/sve-interleaved-accesses.ll @@ -606,6 +606,10 @@ define void @deinterleave_nxptr_factor2(ptr %ptr) #2 { ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP7]], i64 2) ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { , } [[LDN2]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP5]], [[TMP9]], i64 2) +; CHECK-NEXT: [[TMP11:%.*]] = insertvalue { , } poison, [[TMP8]], 0 +; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { , } [[TMP11]], [[TMP10]], 1 +; CHECK-NEXT: [[EXTRACT1:%.*]] = extractvalue { , } [[TMP12]], 0 +; CHECK-NEXT: [[EXTRACT2:%.*]] = extractvalue { , } [[TMP12]], 1 ; CHECK-NEXT: ret void ; %wide.vec = load , ptr %ptr, align 8