-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[VP][EVL] Support select instruction with EVL-vectorization #109614
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: LiqinWeng (LiqinWeng) ChangesFull diff: https://github.com/llvm/llvm-project/pull/109614.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 73d218cdc7ac27..82dcd513240a5f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -922,6 +922,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
+ case VPRecipeBase::VPWidenSelectEVLSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
case VPRecipeBase::VPCanonicalIVPHISC:
@@ -1689,10 +1690,17 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
/// A recipe for widening select instructions.
struct VPWidenSelectRecipe : public VPSingleDefRecipe {
+
+protected:
+ template <typename IterT>
+ VPWidenSelectRecipe(unsigned VPDefOpcode, SelectInst &I,
+ iterator_range<IterT> Operands)
+ : VPSingleDefRecipe(VPDefOpcode, Operands, &I, I.getDebugLoc()) {}
+
+public:
template <typename IterT>
VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
- : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
- I.getDebugLoc()) {}
+ : VPWidenSelectRecipe(VPDef::VPWidenSelectSC, I, Operands) {}
~VPWidenSelectRecipe() override = default;
@@ -1701,7 +1709,15 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
operands());
}
- VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenSelectEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Produce a widened version of the select instruction.
void execute(VPTransformState &State) override;
@@ -1721,6 +1737,52 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
}
};
+// A recipe for widening select instruction with vector-predication intrinsics
+// with explicit vector length (EVL).
+struct VPWidenSelectEVLRecipe : public VPWidenSelectRecipe {
+
+ template <typename IterT>
+ VPWidenSelectEVLRecipe(SelectInst &I, iterator_range<IterT> Operands,
+ VPValue &EVL)
+ : VPWidenSelectRecipe(VPDef::VPWidenSelectEVLSC, I, Operands) {
+ addOperand(&EVL);
+ }
+
+ VPWidenSelectEVLRecipe(VPWidenSelectRecipe &W, VPValue &EVL)
+ : VPWidenSelectEVLRecipe(*cast<SelectInst>(W.getUnderlyingInstr()),
+ W.operands(), EVL) {}
+
+ ~VPWidenSelectEVLRecipe() override = default;
+
+ VPWidenSelectEVLRecipe *clone() final {
+ llvm_unreachable("VPWidenSelectEVLRecipe cannot be cloned");
+ return nullptr;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenSelectEVLSC)
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// Produce a vp-intrinsic version of the select instruction.
+ void execute(VPTransformState &State) final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ return getEVL() == Op;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
/// A recipe for handling GEP instructions.
class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
bool isPointerLoopInvariant() const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a2cfbc35cb84f..cc6797cb66c094 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -76,7 +76,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectEVLSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -117,7 +118,8 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectEVLSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -168,7 +170,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenEVLSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectEVLSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -1060,6 +1063,21 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
getOperand(2)->printAsOperand(O, SlotTracker);
O << (isInvariantCond() ? " (condition is loop invariant)" : "");
}
+
+void VPWidenSelectEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-SELECT ";
+ printAsOperand(O, SlotTracker);
+ O << " = vp.select ";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(1)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(2)->printAsOperand(O, SlotTracker);
+ O << ", ";
+ getOperand(3)->printAsOperand(O, SlotTracker);
+ O << (isInvariantCond() ? " (condition is loop invariant)" : "");
+}
#endif
void VPWidenSelectRecipe::execute(VPTransformState &State) {
@@ -1082,6 +1100,36 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
}
}
+void VPWidenSelectEVLRecipe::execute(VPTransformState &State) {
+ State.setDebugLocFrom(getDebugLoc());
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+
+ Value *EVLArg = State.get(getEVL(), 0, /*NeedsScalar=*/true);
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Builder.setEVL(EVLArg);
+ // The condition can be loop invariant but still defined inside the
+ // loop. This means that we can't just use the original 'cond' value.
+ // We have to take the 'vectorized' value and pick the first lane.
+ // Instcombine will make this a no-op.
+ auto *InvarCond =
+ isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
+
+ Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0);
+ if (!isa<VectorType>(Cond->getType())) {
+ Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond");
+ }
+
+ Value *Op0 = State.get(getOperand(1), 0);
+ Value *Op1 = State.get(getOperand(2), 0);
+ Value *VPInst = Builder.createVectorInstruction(
+ Instruction::Select, Op0->getType(), {Cond, Op0, Op1}, "vp.select");
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+}
+
VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
const FastMathFlags &FMF) {
AllowReassoc = FMF.allowReassoc();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index edcd7d26e60daa..8a284fdc4c2cd4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1344,6 +1344,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
+ .Case<VPWidenSelectRecipe>(
+ [&](VPWidenSelectRecipe *W) -> VPRecipeBase * {
+ return new VPWidenSelectEVLRecipe(*W, EVL);
+ })
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index a47ce61e28c50b..6bb51e7430ce03 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -359,6 +359,7 @@ class VPDef {
VPWidenSC,
VPWidenEVLSC,
VPWidenSelectSC,
+ VPWidenSelectEVLSC,
VPBlendSC,
// START: Phi-like recipes. Need to be kept together.
VPWidenPHISC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 99bc4c38a3c3cd..086d5d6ba24453 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -148,6 +148,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
return VerifyEVLUse(
*W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
+ .Case<VPWidenSelectEVLRecipe>(
+ [&](const VPWidenSelectEVLRecipe *S) {
+ return VerifyEVLUse(*S, 3);
+ })
.Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
return VerifyEVLUse(*R, 2);
})
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 41796e848632e4..fc12dd54f88dfa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -70,7 +70,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
+; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
new file mode 100644
index 00000000000000..62e0d3e58092df
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -0,0 +1,101 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6>
+; IF-EVL-NEXT: vp<%8> = vector-pointer ir<%arrayidx3>
+; IF-EVL-NEXT: WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
+; IF-EVL-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
+; IF-EVL-NEXT: WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5>
+; IF-EVL-NEXT: WIDEN-SELECT ir<%cond.p> = vp.select ir<%cmp4>, ir<%1>, ir<%2>, vp<%5>
+; IF-EVL-NEXT: WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5>
+; IF-EVL-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx15>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, ir<%cond>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx3>
+; NO-VP-NEXT: WIDEN ir<%1> = load vp<%5>
+; NO-VP-NEXT: WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
+; NO-VP-NEXT: WIDEN ir<%2> = sub ir<0>, ir<%1>
+; NO-VP-NEXT: WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2>
+; NO-VP-NEXT: WIDEN ir<%cond> = add ir<%cond.p>, ir<%0>
+; NO-VP-NEXT: CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%6> = vector-pointer ir<%arrayidx15>
+; NO-VP-NEXT: WIDEN store vp<%6>, ir<%cond>
+; NO-VP-NEXT: EMIT vp<%7> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%7>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+
+entry:
+ %cmp30 = icmp sgt i64 %N, 0
+ br i1 %cmp30, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+ %1 = load i32, ptr %arrayidx3, align 4
+ %cmp4 = icmp sgt i32 %0, %1
+ %2 = sub i32 0, %1
+ %cond.p = select i1 %cmp4, i32 %1, i32 %2
+ %cond = add i32 %cond.p, %0
+ %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+ store i32 %cond, ptr %arrayidx15, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
|
|
||
Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0); | ||
if (!isa<VectorType>(Cond->getType())) { | ||
Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you have a test for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I didn't construct such a test case, but adding conditions may be safer
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Need a test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I tried to wirte such a test case, but it didn't sucess, so I will remove this condition for now.
790bbf6
to
eba26a7
Compare
@@ -1116,9 +1116,8 @@ void VPWidenSelectEVLRecipe::execute(VPTransformState &State) { | |||
auto *InvarCond = | |||
isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr; | |||
|
|||
// FIXME: Do we have a scenario where cond is a scalar? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to add assertion here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I might be missing something, but can't this happen when the condition is a loop invariant value? If so, this should have a. test.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added assertion
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with you. I tried to construct a test case from C that would satisfy this, but it didn't sucess, so I added an assertion.
55f67be
to
8b6eb58
Compare
Any other questions? Can I merge it ? @fhahn @alexey-bataev |
8b6eb58
to
0053b2a
Compare
I am wondering if there's a better way to avoid duplicating all or most recipes for EVL. Would it be sufficient to create recipes to widen the appropriate VPIntrinsic directly at least for some cases? This would need allowing creating widened intrinsic recipes without underlying call instruction or function object (#110486). Then introducing vp.select (and others) can be added with a few lines like in #110489 |
I’ve also been seriously thinking about this recently. But my thinking is whether it’s really necessary to create an EVL version for each recipe, especially after noticing VLOpt #108640. I think maybe we only need to create EVL recipes for a small subset of recipes, such as those related to memory accesses (load, store, interleaved accesses), in-loop reduction... etc. IIUC, VLOpt should have the ability to prune the VL.
In conclusion, we only need to transforms the partial recipes into EVL recipes. As for which ones exactly, we can determine by whether the recipe has a mask operand to decide if an EVL recipe is necessary. What do you think? |
If possible, I think it is better to avoid creating EVL that is larger than required. The first reason is that the VLOpt pass is not very mature and may miss some cases. The original intention of the pass is to fix the case introduced by vector GEP which has no EVL based version. In that case, there was no way for the compiler to use the correct VL at the IR level which makes that pass necessary. But if the compiler can avoid introducing larger than needed EVL, that is preferred, since it will create less work for the VLOpt pass, which will improve compiler times. |
+1 |
If triple count is not a constant, can it be directly introduced into EVL? |
No description provided.