[VP][EVL] Support select instruction with EVL-vectorization #109614

LiqinWeng · 2024-09-23T06:26:11Z

No description provided.

llvmbot · 2024-09-23T06:26:47Z

@llvm/pr-subscribers-llvm-transforms

Author: LiqinWeng (LiqinWeng)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/109614.diff

7 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VPlan.h (+65-3)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+51-3)
(modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+4)
(modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
(modified) llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp (+4)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll (+1-1)
(added) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll (+101)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 73d218cdc7ac27..82dcd513240a5f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -922,6 +922,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPWidenSC:
     case VPRecipeBase::VPWidenEVLSC:
     case VPRecipeBase::VPWidenSelectSC:
+    case VPRecipeBase::VPWidenSelectEVLSC:
     case VPRecipeBase::VPBlendSC:
     case VPRecipeBase::VPPredInstPHISC:
     case VPRecipeBase::VPCanonicalIVPHISC:
@@ -1689,10 +1690,17 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
 
 /// A recipe for widening select instructions.
 struct VPWidenSelectRecipe : public VPSingleDefRecipe {
+
+protected:
+  template <typename IterT>
+  VPWidenSelectRecipe(unsigned VPDefOpcode, SelectInst &I,
+                      iterator_range<IterT> Operands)
+      : VPSingleDefRecipe(VPDefOpcode, Operands, &I, I.getDebugLoc()) {}
+
+public:
   template <typename IterT>
   VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
-      : VPSingleDefRecipe(VPDef::VPWidenSelectSC, Operands, &I,
-                          I.getDebugLoc()) {}
+      : VPWidenSelectRecipe(VPDef::VPWidenSelectSC, I, Operands) {}
 
   ~VPWidenSelectRecipe() override = default;
 
@@ -1701,7 +1709,15 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
                                    operands());
   }
 
-  VP_CLASSOF_IMPL(VPDef::VPWidenSelectSC)
+  static inline bool classof(const VPRecipeBase *R) {
+    return R->getVPDefID() == VPRecipeBase::VPWidenSelectSC ||
+           R->getVPDefID() == VPRecipeBase::VPWidenSelectEVLSC;
+  }
+
+  static inline bool classof(const VPUser *U) {
+    auto *R = dyn_cast<VPRecipeBase>(U);
+    return R && classof(R);
+  }
 
   /// Produce a widened version of the select instruction.
   void execute(VPTransformState &State) override;
@@ -1721,6 +1737,52 @@ struct VPWidenSelectRecipe : public VPSingleDefRecipe {
   }
 };
 
+// A recipe for widening select instruction with vector-predication intrinsics
+// with explicit vector length (EVL).
+struct VPWidenSelectEVLRecipe : public VPWidenSelectRecipe {
+
+  template <typename IterT>
+  VPWidenSelectEVLRecipe(SelectInst &I, iterator_range<IterT> Operands,
+                         VPValue &EVL)
+      : VPWidenSelectRecipe(VPDef::VPWidenSelectEVLSC, I, Operands) {
+    addOperand(&EVL);
+  }
+
+  VPWidenSelectEVLRecipe(VPWidenSelectRecipe &W, VPValue &EVL)
+      : VPWidenSelectEVLRecipe(*cast<SelectInst>(W.getUnderlyingInstr()),
+                               W.operands(), EVL) {}
+
+  ~VPWidenSelectEVLRecipe() override = default;
+
+  VPWidenSelectEVLRecipe *clone() final {
+    llvm_unreachable("VPWidenSelectEVLRecipe cannot be cloned");
+    return nullptr;
+  }
+
+  VP_CLASSOF_IMPL(VPDef::VPWidenSelectEVLSC)
+
+  VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+  const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+  /// Produce a vp-intrinsic version of the select instruction.
+  void execute(VPTransformState &State) final;
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    // EVL in that recipe is always the last operand, thus any use before means
+    // the VPValue should be vectorized.
+    return getEVL() == Op;
+  }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
 /// A recipe for handling GEP instructions.
 class VPWidenGEPRecipe : public VPRecipeWithIRFlags {
   bool isPointerLoopInvariant() const {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 9a2cfbc35cb84f..cc6797cb66c094 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -76,7 +76,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
   case VPWidenPHISC:
   case VPWidenSC:
   case VPWidenEVLSC:
-  case VPWidenSelectSC: {
+  case VPWidenSelectSC:
+  case VPWidenSelectEVLSC: {
     const Instruction *I =
         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
     (void)I;
@@ -117,7 +118,8 @@ bool VPRecipeBase::mayReadFromMemory() const {
   case VPWidenPHISC:
   case VPWidenSC:
   case VPWidenEVLSC:
-  case VPWidenSelectSC: {
+  case VPWidenSelectSC:
+  case VPWidenSelectEVLSC: {
     const Instruction *I =
         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
     (void)I;
@@ -168,7 +170,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
   case VPWidenPointerInductionSC:
   case VPWidenSC:
   case VPWidenEVLSC:
-  case VPWidenSelectSC: {
+  case VPWidenSelectSC:
+  case VPWidenSelectEVLSC: {
     const Instruction *I =
         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
     (void)I;
@@ -1060,6 +1063,21 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
   getOperand(2)->printAsOperand(O, SlotTracker);
   O << (isInvariantCond() ? " (condition is loop invariant)" : "");
 }
+
+void VPWidenSelectEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+                                   VPSlotTracker &SlotTracker) const {
+  O << Indent << "WIDEN-SELECT ";
+  printAsOperand(O, SlotTracker);
+  O << " = vp.select ";
+  getOperand(0)->printAsOperand(O, SlotTracker);
+  O << ", ";
+  getOperand(1)->printAsOperand(O, SlotTracker);
+  O << ", ";
+  getOperand(2)->printAsOperand(O, SlotTracker);
+  O << ", ";
+  getOperand(3)->printAsOperand(O, SlotTracker);
+  O << (isInvariantCond() ? " (condition is loop invariant)" : "");
+}
 #endif
 
 void VPWidenSelectRecipe::execute(VPTransformState &State) {
@@ -1082,6 +1100,36 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
   }
 }
 
+void VPWidenSelectEVLRecipe::execute(VPTransformState &State) {
+  State.setDebugLocFrom(getDebugLoc());
+  assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+                          "explicit vector length.");
+
+  Value *EVLArg = State.get(getEVL(), 0, /*NeedsScalar=*/true);
+  IRBuilderBase &BuilderIR = State.Builder;
+  VectorBuilder Builder(BuilderIR);
+  Builder.setEVL(EVLArg);
+  // The condition can be loop invariant but still defined inside the
+  // loop. This means that we can't just use the original 'cond' value.
+  // We have to take the 'vectorized' value and pick the first lane.
+  // Instcombine will make this a no-op.
+  auto *InvarCond =
+      isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
+
+  Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0);
+  if (!isa<VectorType>(Cond->getType())) {
+    Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond");
+  }
+
+  Value *Op0 = State.get(getOperand(1), 0);
+  Value *Op1 = State.get(getOperand(2), 0);
+  Value *VPInst = Builder.createVectorInstruction(
+      Instruction::Select, Op0->getType(), {Cond, Op0, Op1}, "vp.select");
+  State.set(this, VPInst, 0);
+  State.addMetadata(VPInst,
+                    dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+}
+
 VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
     const FastMathFlags &FMF) {
   AllowReassoc = FMF.allowReassoc();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index edcd7d26e60daa..8a284fdc4c2cd4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1344,6 +1344,10 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
                   return nullptr;
                 return new VPWidenEVLRecipe(*W, EVL);
               })
+              .Case<VPWidenSelectRecipe>(
+                  [&](VPWidenSelectRecipe *W) -> VPRecipeBase * {
+                    return new VPWidenSelectEVLRecipe(*W, EVL);
+                  })
               .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
                 VPValue *NewMask = GetNewMask(Red->getCondOp());
                 return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index a47ce61e28c50b..6bb51e7430ce03 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -359,6 +359,7 @@ class VPDef {
     VPWidenSC,
     VPWidenEVLSC,
     VPWidenSelectSC,
+    VPWidenSelectEVLSC,
     VPBlendSC,
     // START: Phi-like recipes. Need to be kept together.
     VPWidenPHISC,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 99bc4c38a3c3cd..086d5d6ba24453 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -148,6 +148,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
                return VerifyEVLUse(
                    *W, Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
              })
+             .Case<VPWidenSelectEVLRecipe>(
+                 [&](const VPWidenSelectEVLRecipe *S) {
+                   return VerifyEVLUse(*S, 3);
+                 })
              .Case<VPReductionEVLRecipe>([&](const VPReductionEVLRecipe *R) {
                return VerifyEVLUse(*R, 2);
              })
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 41796e848632e4..fc12dd54f88dfa 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -70,7 +70,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
 ; IF-EVL-INLOOP-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
 ; IF-EVL-INLOOP-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
 ; IF-EVL-INLOOP-NEXT:    [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; IF-EVL-INLOOP-NEXT:    [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
+; IF-EVL-INLOOP-NEXT:    [[TMP20:%.*]] = call <vscale x 4 x i32> @llvm.vp.select.nxv4i32(<vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, i32 [[TMP12]])
 ; IF-EVL-INLOOP-NEXT:    [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
 ; IF-EVL-INLOOP-NEXT:    [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
 ; IF-EVL-INLOOP-NEXT:    [[TMP23:%.*]] = zext i32 [[TMP12]] to i64
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
new file mode 100644
index 00000000000000..62e0d3e58092df
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll
@@ -0,0 +1,101 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT:   vector.body:
+; IF-EVL-NEXT:     EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT:     EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT:     EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT:     vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT:     vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT:     WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT:     CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%6>
+; IF-EVL-NEXT:     vp<%8> = vector-pointer ir<%arrayidx3>
+; IF-EVL-NEXT:     WIDEN ir<%1> = vp.load vp<%8>, vp<%5>
+; IF-EVL-NEXT:     WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
+; IF-EVL-NEXT:     WIDEN ir<%2> = vp.sub ir<0>, ir<%1>, vp<%5>
+; IF-EVL-NEXT:     WIDEN-SELECT ir<%cond.p> = vp.select ir<%cmp4>, ir<%1>, ir<%2>, vp<%5>
+; IF-EVL-NEXT:     WIDEN ir<%cond> = vp.add ir<%cond.p>, ir<%0>, vp<%5>
+; IF-EVL-NEXT:     CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT:     vp<%9> = vector-pointer ir<%arrayidx15>
+; IF-EVL-NEXT:     WIDEN vp.store vp<%9>, ir<%cond>, vp<%5>
+; IF-EVL-NEXT:     SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT:     EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT:     EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT:     EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT:   No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT:   vector.body:
+; NO-VP-NEXT:     EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%7>
+; NO-VP-NEXT:     vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT:     vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT:     WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT:     CLONE ir<%arrayidx3> = getelementptr inbounds ir<%c>, vp<%3>
+; NO-VP-NEXT:     vp<%5> = vector-pointer ir<%arrayidx3>
+; NO-VP-NEXT:     WIDEN ir<%1> = load vp<%5>
+; NO-VP-NEXT:     WIDEN ir<%cmp4> = icmp sgt ir<%0>, ir<%1>
+; NO-VP-NEXT:     WIDEN ir<%2> = sub ir<0>, ir<%1>
+; NO-VP-NEXT:     WIDEN-SELECT ir<%cond.p> = select ir<%cmp4>, ir<%1>, ir<%2>
+; NO-VP-NEXT:     WIDEN ir<%cond> = add ir<%cond.p>, ir<%0>
+; NO-VP-NEXT:     CLONE ir<%arrayidx15> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT:     vp<%6> = vector-pointer ir<%arrayidx15>
+; NO-VP-NEXT:     WIDEN store vp<%6>, ir<%cond>
+; NO-VP-NEXT:     EMIT vp<%7> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT:     EMIT branch-on-count vp<%7>, vp<%1>
+; NO-VP-NEXT:   No successors
+; NO-VP-NEXT: }
+
+
+entry:
+  %cmp30 = icmp sgt i64 %N, 0
+  br i1 %cmp30, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
+  %1 = load i32, ptr %arrayidx3, align 4
+  %cmp4 = icmp sgt i32 %0, %1
+  %2 = sub i32 0, %1
+  %cond.p = select i1 %cmp4, i32 %1, i32 %2
+  %cond = add i32 %cond.p, %0
+  %arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
+  store i32 %cond, ptr %arrayidx15, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-select-intrinsics.ll

alexey-bataev · 2024-09-23T18:05:35Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+
+  Value *Cond = InvarCond ? InvarCond : State.get(getCond(), 0);
+  if (!isa<VectorType>(Cond->getType())) {
+    Cond = BuilderIR.CreateVectorSplat(State.VF, Cond, "splat.cond");


Do you have a test for this?

Sorry, I didn't construct such a test case, but adding conditions may be safer

Need a test

Sorry, I tried to wirte such a test case, but it didn't sucess, so I will remove this condition for now.

alexey-bataev · 2024-09-25T12:59:48Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

@@ -1116,9 +1116,8 @@ void VPWidenSelectEVLRecipe::execute(VPTransformState &State) {
  auto *InvarCond =
      isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;

+  // FIXME: Do we have a scenario where cond is a scalar?


Better to add assertion here

I might be missing something, but can't this happen when the condition is a loop invariant value? If so, this should have a. test.

added assertion

I agree with you. I tried to construct a test case from C that would satisfy this, but it didn't sucess, so I added an assertion.

LiqinWeng · 2024-09-29T07:28:35Z

Any other questions? Can I merge it ? @fhahn @alexey-bataev

rebase and update testcase

fhahn · 2024-09-30T10:56:45Z

I am wondering if there's a better way to avoid duplicating all or most recipes for EVL.

Would it be sufficient to create recipes to widen the appropriate VPIntrinsic directly at least for some cases? This would need allowing creating widened intrinsic recipes without underlying call instruction or function object (#110486). Then introducing vp.select (and others) can be added with a few lines like in #110489

Mel-Chen · 2024-10-07T12:50:20Z

I am wondering if there's a better way to avoid duplicating all or most recipes for EVL.

I’ve also been seriously thinking about this recently. But my thinking is whether it’s really necessary to create an EVL version for each recipe, especially after noticing VLOpt #108640.

I think maybe we only need to create EVL recipes for a small subset of recipes, such as those related to memory accesses (load, store, interleaved accesses), in-loop reduction... etc. IIUC, VLOpt should have the ability to prune the VL.
There are two pros to this approach:

Minimizing the number of added recipes.
We can reuse optimizations, such as InstCombine. For example, if there is a combination rule for add <vec1>, <vec2>, we can share that rule without needing to add the similar comibination rule for vp.add.

In conclusion, we only need to transforms the partial recipes into EVL recipes. As for which ones exactly, we can determine by whether the recipe has a mask operand to decide if an EVL recipe is necessary.

What do you think?

michaelmaitland · 2024-10-07T14:21:34Z

I’ve also been seriously thinking about this recently. But my thinking is whether it’s really necessary to create an EVL version for each recipe, especially after noticing VLOpt #108640.

If possible, I think it is better to avoid creating EVL that is larger than required. The first reason is that the VLOpt pass is not very mature and may miss some cases. The original intention of the pass is to fix the case introduced by vector GEP which has no EVL based version. In that case, there was no way for the compiler to use the correct VL at the IR level which makes that pass necessary. But if the compiler can avoid introducing larger than needed EVL, that is preferred, since it will create less work for the VLOpt pass, which will improve compiler times.

alexey-bataev · 2024-10-07T16:16:29Z

I’ve also been seriously thinking about this recently. But my thinking is whether it’s really necessary to create an EVL version for each recipe, especially after noticing VLOpt #108640.

If possible, I think it is better to avoid creating EVL that is larger than required. The first reason is that the VLOpt pass is not very mature and may miss some cases. The original intention of the pass is to fix the case introduced by vector GEP which has no EVL based version. In that case, there was no way for the compiler to use the correct VL at the IR level which makes that pass necessary. But if the compiler can avoid introducing larger than needed EVL, that is preferred, since it will create less work for the VLOpt pass, which will improve compiler times.

+1

LiqinWeng · 2024-10-08T07:42:35Z

In conclusion, we only need to transforms the partial recipes into EVL recipes. As for which ones exactly, we can determine by whether the recipe has a mask operand to decide if an EVL recipe is necessary.

If triple count is not a constant, can it be directly introduced into EVL?

LiqinWeng requested review from ayalz and fhahn September 23, 2024 06:26

llvmbot added vectorizers llvm:transforms labels Sep 23, 2024

LiqinWeng requested a review from Mel-Chen September 23, 2024 10:49

alexey-bataev reviewed Sep 23, 2024

View reviewed changes

LiqinWeng force-pushed the widen-vp-select-with-evl branch from 790bbf6 to eba26a7 Compare September 24, 2024 04:20

alexey-bataev reviewed Sep 25, 2024

View reviewed changes

LiqinWeng force-pushed the widen-vp-select-with-evl branch 3 times, most recently from 55f67be to 8b6eb58 Compare September 27, 2024 08:29

LiqinWeng added 3 commits September 30, 2024 11:24

[LV][EVL][Test] Prepare test for adding select Recipe

145e8fd

[VP][EVL] Support select instruction with EVL-vectorization

0f1be97

[LV][EVL] Adrress the comments

0053b2a

rebase and update testcase

LiqinWeng force-pushed the widen-vp-select-with-evl branch from 8b6eb58 to 0053b2a Compare September 30, 2024 03:24

fhahn mentioned this pull request Oct 7, 2024

[LV][EVL] Support cast instruction with EVL-vectorization #108351

Merged

Mel-Chen mentioned this pull request Oct 7, 2024

[LV][EVL] Emit vp.merge intrinsic to enable out-loop reduction in EVL vectorization. #101641

Merged

LiqinWeng closed this Oct 16, 2024

Mel-Chen mentioned this pull request Dec 10, 2024

[VPlan] Expand VPWidenIntOrFpInductionRecipe into separate recipes #118638

Open

LiqinWeng deleted the widen-vp-select-with-evl branch December 11, 2024 07:26

Mel-Chen mentioned this pull request Feb 27, 2025

[VPlan] Don't convert widen recipes to VP intrinsics in EVL transform #126177

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[VP][EVL] Support select instruction with EVL-vectorization #109614

[VP][EVL] Support select instruction with EVL-vectorization #109614

LiqinWeng commented Sep 23, 2024

llvmbot commented Sep 23, 2024

alexey-bataev Sep 23, 2024

LiqinWeng Sep 24, 2024

alexey-bataev Sep 24, 2024

LiqinWeng Sep 25, 2024

alexey-bataev Sep 25, 2024

fhahn Sep 25, 2024

LiqinWeng Sep 27, 2024

LiqinWeng Sep 29, 2024

LiqinWeng commented Sep 29, 2024

fhahn commented Sep 30, 2024

Mel-Chen commented Oct 7, 2024

michaelmaitland commented Oct 7, 2024

alexey-bataev commented Oct 7, 2024

LiqinWeng commented Oct 8, 2024

[VP][EVL] Support select instruction with EVL-vectorization #109614

[VP][EVL] Support select instruction with EVL-vectorization #109614

Conversation

LiqinWeng commented Sep 23, 2024

llvmbot commented Sep 23, 2024

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

LiqinWeng commented Sep 29, 2024

fhahn commented Sep 30, 2024

Mel-Chen commented Oct 7, 2024

michaelmaitland commented Oct 7, 2024

alexey-bataev commented Oct 7, 2024

LiqinWeng commented Oct 8, 2024