From 145e8fded27748078e552f06fbcc4d214ad0ac71 Mon Sep 17 00:00:00 2001 From: LiqinWeng Date: Tue, 24 Sep 2024 11:20:07 +0800 Subject: [PATCH 1/3] [LV][EVL][Test] Prepare test for adding select Recipe --- .../RISCV/vplan-vp-select-intrinsics.ll | 101 ++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll new file mode 100644 index 0000000000000..da75d32bca9b2 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -0,0 +1,101 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s + +define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { +; IF-EVL-NEXT: Live-in vp<%0> = VF * UF +; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count +; IF-EVL-NEXT: Live-in ir<%N> = original trip-count + +; IF-EVL: vector.ph: +; IF-EVL-NEXT: Successor(s): vector loop + +; IF-EVL: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12> +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11> +; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N> +; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1> +; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6> +; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx> +; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5> +; IF-EVL-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6> +; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx3> +; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5> +; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1> +; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5> +; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2> +; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5> +; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6> +; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15> +; IF-EVL-NEXT: WIDEN vp.store vp<%9>, ir<%cond>, vp<%5> +; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64 +; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4> +; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0> +; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } + +; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { +; NO-VP-NEXT: Live-in vp<%0> = VF * UF +; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count +; NO-VP-NEXT: Live-in ir<%N> = original trip-count + +; NO-VP: vector.ph: +; NO-VP-NEXT: Successor(s): vector loop + +; NO-VP: vector loop: { +; NO-VP-NEXT: vector.body: +; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7> +; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> +; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3> +; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx> +; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4> +; NO-VP-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3> +; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx3> +; NO-VP-NEXT: WIDEN ir<%1> = load vp<%5> +; NO-VP-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1> +; NO-VP-NEXT: WIDEN ir<%2> = sub ir<0>, ir<%1> +; NO-VP-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2> +; NO-VP-NEXT: WIDEN ir<%cond> = add ir<%cond.p>, ir<%0> +; NO-VP-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%3> +; NO-VP-NEXT: vp<%6> = vector-pointer ir<%arrayidx15> +; NO-VP-NEXT: WIDEN store vp<%6>, ir<%cond> +; NO-VP-NEXT: EMIT vp<%7> = add nuw vp<%2>, vp<%0> +; NO-VP-NEXT: EMIT branch-on-count vp<%7>, vp<%1> +; NO-VP-NEXT: No successors +; NO-VP-NEXT: } + + +entry: + %cmp30 = icmp sgt i64 %N, 0 + br i1 %cmp30, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv + %1 = load i32, ptr %arrayidx3, align 4 + %cmp4 = icmp sgt i32 %0, %1 + %2 = sub i32 0, %1 + %cond.p = select i1 %cmp4, i32 %1, i32 %2 + %cond = add i32 %cond.p, %0 + %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv + store i32 %cond, ptr %arrayidx15, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} From 0f1be9728dd7a986e96d4143a850eaecb420b558 Mon Sep 17 00:00:00 2001 From: LiqinWeng Date: Thu, 19 Sep 2024 17:46:54 +0800 Subject: [PATCH 2/3] [VP][EVL] Support select instruction with EVL-vectorization --- llvm/lib/Transforms/Vectorize/VPlan.h | 68 ++++++++++++++++++- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 53 ++++++++++++++- .../Transforms/Vectorize/VPlanTransforms.cpp | 4 ++ llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + .../Transforms/Vectorize/VPlanVerifier.cpp | 4 ++ ...rize-force-tail-with-evl-cond-reduction.ll | 2 +- .../RISCV/vplan-vp-select-intrinsics.ll | 2 +- 7 files changed, 126 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index c4567362eaffc..4eb6fc931b398 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -889,6 +889,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { case VPRecipeBase::VPWidenSC: case VPRecipeBase::VPWidenEVLSC: case VPRecipeBase::VPWidenSelectSC: + case VPRecipeBase::VPWidenSelectEVLSC: case VPRecipeBase::VPBlendSC: case VPRecipeBase::VPPredInstPHISC: case VPRecipeBase::VPCanonicalIVPHISC: @@ -1712,10 +1713,17 @@ class VPHistogramRecipe : public VPRecipeBase { /// A recipe for widening select instructions. struct VPWidenSelectRecipe : public VPSingleDefRecipe { + +protected: + template + VPWidenSelectRecipe(unsigned VPDefOpcode, SelectInst &I, + iterator_range Operands) + : VPSingleDefRecipe(VPDefOpcode, Operands, &I, I.getDebugLoc()) {} + +public: template VPWidenSelectRecipe(SelectInst &I, iterator_range Operands) - : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I, - I.getDebugLoc()) {} + : VPWidenSelectRecipe(VPDef::VPWidenSelectSC, I, Operands) {} ~VPWidenSelectRecipe() override = default; @@ -1724,7 +1732,15 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe { operands()); } - VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC) + static inline bool classof(const VPRecipeBase *R) { + return R->getVPDefID() == VPRecipeBase::VPWidenSelectSC || + R->getVPDefID() == VPRecipeBase::VPWidenSelectEVLSC; + } + + static inline bool classof(const VPUser *U) { + auto *R = dyn_cast(U); + return R && classof(R); + } /// Produce a widened version of the select instruction. void execute(VPTransformState &State) override; @@ -1744,6 +1760,52 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe { } }; +// A recipe for widening select instruction with vector-predication intrinsics +// with explicit vector length (EVL). +struct VPWidenSelectEVLRecipe : public VPWidenSelectRecipe { + + template + VPWidenSelectEVLRecipe(SelectInst &I, iterator_range Operands, + VPValue &EVL) + : VPWidenSelectRecipe(VPDef::VPWidenSelectEVLSC, I, Operands) { + addOperand(&EVL); + } + + VPWidenSelectEVLRecipe(VPWidenSelectRecipe &W, VPValue &EVL) + : VPWidenSelectEVLRecipe(*cast(W.getUnderlyingInstr()), + W.operands(), EVL) {} + + ~VPWidenSelectEVLRecipe() override = default; + + VPWidenSelectEVLRecipe *clone() final { + llvm_unreachable("VPWidenSelectEVLRecipe cannot be cloned"); + return nullptr; + } + + VP_CLASSOF_IMPL(VPDef::VPWidenSelectEVLSC) + + VPValue *getEVL() { return getOperand(getNumOperands() - 1); } + const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); } + + /// Produce a vp-intrinsic version of the select instruction. + void execute(VPTransformState &State) final; + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + // EVL in that recipe is always the last operand, thus any use before means + // the VPValue should be vectorized. + return getEVL() == Op; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const final; +#endif +}; + /// A recipe for handling GEP instructions. class VPWidenGEPRecipe : public VPRecipeWithIRFlags { bool isPointerLoopInvariant() const { diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7590863853295..1923897c43fd0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -95,7 +95,8 @@ bool VPRecipeBase::mayWriteToMemory() const { case VPWidenPHISC: case VPWidenSC: case VPWidenEVLSC: - case VPWidenSelectSC: { + case VPWidenSelectSC: + case VPWidenSelectEVLSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); (void)I; @@ -136,7 +137,8 @@ bool VPRecipeBase::mayReadFromMemory() const { case VPWidenPHISC: case VPWidenSC: case VPWidenEVLSC: - case VPWidenSelectSC: { + case VPWidenSelectSC: + case VPWidenSelectEVLSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); (void)I; @@ -173,7 +175,8 @@ bool VPRecipeBase::mayHaveSideEffects() const { case VPWidenPointerInductionSC: case VPWidenSC: case VPWidenEVLSC: - case VPWidenSelectSC: { + case VPWidenSelectSC: + case VPWidenSelectEVLSC: { const Instruction *I = dyn_cast_or_null(getVPSingleValue()->getUnderlyingValue()); (void)I; @@ -1127,6 +1130,21 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, getOperand(2)->printAsOperand(O, SlotTracker); O << (isInvariantCond() ? " (condition is loop invariant)" : ""); } + +void VPWidenSelectEVLRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "WIDEN-SELECT "; + printAsOperand(O, SlotTracker); + O << " = vp.select "; + getOperand(0)->printAsOperand(O, SlotTracker); + O << ", "; + getOperand(1)->printAsOperand(O, SlotTracker); + O << ", "; + getOperand(2)->printAsOperand(O, SlotTracker); + O << ", "; + getOperand(3)->printAsOperand(O, SlotTracker); + O << (isInvariantCond() ? " (condition is loop invariant)" : ""); +} #endif void VPWidenSelectRecipe::execute(VPTransformState &State) { @@ -1147,6 +1165,35 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { State.addMetadata(Sel, dyn_cast_or_null(getUnderlyingValue())); } +void VPWidenSelectEVLRecipe::execute(VPTransformState &State) { + State.setDebugLocFrom(getDebugLoc()); + assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " + "explicit vector length."); + + Value *EVLArg = State.get(getEVL(), 0, /*NeedsScalar=*/true); + IRBuilderBase &BuilderIR = State.Builder; + VectorBuilder Builder(BuilderIR); + Builder.setEVL(EVLArg); + // The condition can be loop invariant but still defined inside the + // loop. This means that we can't just use the original 'cond' value. + // We have to take the 'vectorized' value and pick the first lane. + // Instcombine will make this a no-op. + auto *InvarCond = + isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; + + Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0); + if (!isa(Cond->getType())) + Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond"); + + Value *Op0 = State.get(getOperand(1), 0); + Value *Op1 = State.get(getOperand(2), 0); + Value *VPInst = Builder.createVectorInstruction( + Instruction::Select, Op0->getType(), {Cond, Op0, Op1}, "vp.select"); + State.set(this, VPInst, 0); + State.addMetadata(VPInst, + dyn_cast_or_null(getUnderlyingValue())); +} + VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy( const FastMathFlags &FMF) { AllowReassoc = FMF.allowReassoc(); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index a878613c4ba48..94e325764a273 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1379,6 +1379,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { return nullptr; return new VPWidenEVLRecipe(*W, EVL); }) + .Case( + [&](VPWidenSelectRecipe *W) -> VPRecipeBase * { + return new VPWidenSelectEVLRecipe(*W, EVL); + }) .Case([&](VPReductionRecipe *Red) { VPValue *NewMask = GetNewMask(Red->getCondOp()); return new VPReductionEVLRecipe(*Red, EVL, NewMask); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 4c383244f96f1..3b11b7ef3ef50 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -357,6 +357,7 @@ class VPDef { VPWidenSC, VPWidenEVLSC, VPWidenSelectSC, + VPWidenSelectEVLSC, VPBlendSC, VPHistogramSC, // START: Phi-like recipes. Need to be kept together. diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 99bc4c38a3c3c..086d5d6ba2445 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -148,6 +148,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const { return VerifyEVLUse( *W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2); }) + .Case( + [&](const VPWidenSelectEVLRecipe *S) { + return VerifyEVLUse(*S, 3); + }) .Case([&](const VPReductionEVLRecipe *R) { return VerifyEVLUse(*R, 2); }) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll index 41796e848632e..fc12dd54f88df 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll @@ -70,7 +70,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) { ; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 ; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt [[VP_OP_LOAD]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select [[TMP19]], [[VP_OP_LOAD]], zeroinitializer +; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call @llvm.vp.select.nxv4i32( [[TMP19]], [[VP_OP_LOAD]], zeroinitializer, i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) ; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]] ; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index da75d32bca9b2..62e0d3e58092d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -33,7 +33,7 @@ define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5> ; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1> ; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5> -; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2> +; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = vp.select ir<%cmp4>, ir<%1>, ir<%2>, vp<%5> ; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5> ; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6> ; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15> From 0053b2a35485534c2b771698c0465d59192da9a7 Mon Sep 17 00:00:00 2001 From: LiqinWeng Date: Wed, 25 Sep 2024 18:24:00 +0800 Subject: [PATCH 3/3] [LV][EVL] Adrress the comments rebase and update testcase --- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 10 +- .../RISCV/vplan-vp-select-intrinsics.ll | 94 ++++++------------- 2 files changed, 32 insertions(+), 72 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 1923897c43fd0..5b7b4a4dea1da 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1167,10 +1167,7 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) { void VPWidenSelectEVLRecipe::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); - assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " - "explicit vector length."); - - Value *EVLArg = State.get(getEVL(), 0, /*NeedsScalar=*/true); + Value *EVLArg = State.get(getEVL(), /*NeedsScalar=*/true); IRBuilderBase &BuilderIR = State.Builder; VectorBuilder Builder(BuilderIR); Builder.setEVL(EVLArg); @@ -1179,11 +1176,10 @@ void VPWidenSelectEVLRecipe::execute(VPTransformState &State) { // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. auto *InvarCond = - isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; + isInvariantCond() ? State.get(getCond(), VPLane(0)) : nullptr; Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0); - if (!isa(Cond->getType())) - Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond"); + assert(isa(Cond->getType()) && "CondType must be vector Type."); Value *Op0 = State.get(getOperand(1), 0); Value *Op1 = State.get(getOperand(2), 0); diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll index 62e0d3e58092d..fffe03f5c8528 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll @@ -5,15 +5,10 @@ ; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ ; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s -; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ -; RUN: -force-tail-folding-style=none \ -; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ -; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s - define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { -; IF-EVL-NEXT: Live-in vp<%0> = VF * UF -; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count ; IF-EVL-NEXT: Live-in ir<%N> = original trip-count ; IF-EVL: vector.ph: @@ -21,67 +16,33 @@ define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { ; IF-EVL: vector loop: { ; IF-EVL-NEXT: vector.body: -; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12> -; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11> -; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N> -; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1> -; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6> -; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx> -; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5> -; IF-EVL-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6> -; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx3> -; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5> -; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1> -; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5> -; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = vp.select ir<%cmp4>, ir<%1>, ir<%2>, vp<%5> -; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5> -; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6> -; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15> -; IF-EVL-NEXT: WIDEN vp.store vp<%9>, ir<%cond>, vp<%5> -; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64 -; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4> -; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0> -; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1> +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]> +; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]> +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]> +; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = vp.sub ir<0>, ir<[[LD2]]>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN-SELECT ir<[[SELECT:%.+]]> = vp.select ir<[[CMP]]>, ir<%1>, ir<%2>, vp<[[EVL]]> +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add ir<[[SELECT]]>, ir<[[LD1]]>, vp<[[EVL]]> +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]> +; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]> +; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> ; IF-EVL-NEXT: No successors ; IF-EVL-NEXT: } -; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { -; NO-VP-NEXT: Live-in vp<%0> = VF * UF -; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count -; NO-VP-NEXT: Live-in ir<%N> = original trip-count - -; NO-VP: vector.ph: -; NO-VP-NEXT: Successor(s): vector loop - -; NO-VP: vector loop: { -; NO-VP-NEXT: vector.body: -; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7> -; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> -; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3> -; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx> -; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4> -; NO-VP-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3> -; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx3> -; NO-VP-NEXT: WIDEN ir<%1> = load vp<%5> -; NO-VP-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1> -; NO-VP-NEXT: WIDEN ir<%2> = sub ir<0>, ir<%1> -; NO-VP-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2> -; NO-VP-NEXT: WIDEN ir<%cond> = add ir<%cond.p>, ir<%0> -; NO-VP-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%3> -; NO-VP-NEXT: vp<%6> = vector-pointer ir<%arrayidx15> -; NO-VP-NEXT: WIDEN store vp<%6>, ir<%cond> -; NO-VP-NEXT: EMIT vp<%7> = add nuw vp<%2>, vp<%0> -; NO-VP-NEXT: EMIT branch-on-count vp<%7>, vp<%1> -; NO-VP-NEXT: No successors -; NO-VP-NEXT: } - - entry: - %cmp30 = icmp sgt i64 %N, 0 - br i1 %cmp30, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: - ret void + br label %for.body for.body: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] @@ -97,5 +58,8 @@ for.body: store i32 %cond, ptr %arrayidx15, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond.not = icmp eq i64 %indvars.iv.next, %N - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void }