Skip to content

Commit d40ce49

Browse files
committed
[VPlan] Use VPWidenIntrinsicRecipe to vp.select.
Use VPWidenIntrinsicRecipe (#110486) to create vp.select intrinsics. This potentially offers an alternative to duplicating EVL recipes for all existing recipes. There are some recipes that will need duplicates (at least at the moment), due to extra code-gen needs (e.g. widening loads and stores). But in cases the intrinsic can directly be used, creating the widened intrinsic directly would reduce the need to duplicate some recipes. NOTE: this PR contains the changes from #110486) The relevant changes are in 47135542e2ada3f21b215bf237d8442a56c8456c.
1 parent f9c9f33 commit d40ce49

File tree

6 files changed

+92
-0
lines changed

6 files changed

+92
-0
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1651,6 +1651,8 @@ class VPWidenIntrinsicRecipe : public VPSingleDefRecipe {
16511651
void print(raw_ostream &O, const Twine &Indent,
16521652
VPSlotTracker &SlotTracker) const override;
16531653
#endif
1654+
1655+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
16541656
};
16551657

16561658
/// A recipe for widening Call instructions using library calls.

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
6161
case Instruction::ICmp:
6262
case VPInstruction::ActiveLaneMask:
6363
return inferScalarType(R->getOperand(1));
64+
case VPInstruction::ExplicitVectorLength:
65+
return Type::getIntNTy(Ctx, 32);
6466
case VPInstruction::FirstOrderRecurrenceSplice:
6567
case VPInstruction::Not:
6668
return SetResultTyFromOp();

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1042,6 +1042,14 @@ StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {
10421042
return Intrinsic::getBaseName(VectorIntrinsicID);
10431043
}
10441044

1045+
bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
1046+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1047+
// Vector predication intrinsics only demand the the first lane the last
1048+
// operand (the EVL operand).
1049+
return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
1050+
Op == getOperand(getNumOperands() - 1);
1051+
}
1052+
10451053
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
10461054
void VPWidenIntrinsicRecipe::print(raw_ostream &O, const Twine &Indent,
10471055
VPSlotTracker &SlotTracker) const {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1353,6 +1353,7 @@ void VPlanTransforms::addActiveLaneMask(
13531353
/// Replace recipes with their EVL variants.
13541354
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
13551355
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1356+
VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
13561357
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
13571358
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
13581359
auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
@@ -1384,6 +1385,16 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
13841385
VPValue *NewMask = GetNewMask(Red->getCondOp());
13851386
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
13861387
})
1388+
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
1389+
SmallVector<VPValue *> Ops(Sel->operands());
1390+
Ops.push_back(&EVL);
1391+
return new VPWidenIntrinsicRecipe(
1392+
Sel->getUnderlyingValue(), Intrinsic::vp_select,
1393+
make_range(Ops.begin(), Ops.end()),
1394+
1395+
TypeInfo.inferScalarType(Sel));
1396+
})
1397+
13871398
.Default([&](VPRecipeBase *R) { return nullptr; });
13881399

13891400
if (!NewRecipe)

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
138138
};
139139
for (const VPUser *U : EVL.users()) {
140140
if (!TypeSwitch<const VPUser *, bool>(U)
141+
.Case<VPWidenIntrinsicRecipe>(
142+
[&](const VPWidenIntrinsicRecipe *S) {
143+
return VerifyEVLUse(*S, S->getNumOperands() - 1);
144+
})
141145
.Case<VPWidenStoreEVLRecipe>([&](const VPWidenStoreEVLRecipe *S) {
142146
return VerifyEVLUse(*S, 2);
143147
})
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; REQUIRES: asserts
2+
3+
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
4+
; RUN: -force-tail-folding-style=data-with-evl \
5+
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
6+
; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
7+
8+
define void @vp_select(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
9+
; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' {
10+
; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF
11+
; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count
12+
; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
13+
14+
; IF-EVL: vector.ph:
15+
; IF-EVL-NEXT: Successor(s): vector loop
16+
17+
; IF-EVL: <x1> vector loop: {
18+
; IF-EVL-NEXT: vector.body:
19+
; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION
20+
; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEX:%[0-9]+]]>
21+
; IF-EVL-NEXT: EMIT vp<[[AVL:%.+]]> = sub ir<%N>, vp<[[EVL_PHI]]>
22+
; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[AVL]]>
23+
; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1>
24+
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
25+
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
26+
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
27+
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
28+
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
29+
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
30+
; IF-EVL-NEXT: WIDEN ir<[[CMP:%.+]]> = icmp sgt ir<[[LD1]]>, ir<[[LD2]]>
31+
; IF-EVL-NEXT: WIDEN ir<[[SUB:%.+]]> = vp.sub ir<0>, ir<[[LD2]]>, vp<[[EVL]]>
32+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SELECT:%.+]]> = llvm.vp.select(ir<[[CMP]]>, ir<%1>, ir<%2>, vp<[[EVL]]>)
33+
; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = vp.add ir<[[SELECT]]>, ir<[[LD1]]>, vp<[[EVL]]>
34+
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
35+
; IF-EVL-NEXT: vp<[[PTR3:%.+]]> = vector-pointer ir<[[GEP3]]>
36+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[EVL]]>
37+
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
38+
; IF-EVL-NEXT: EMIT vp<[[IV_NEX]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
39+
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
40+
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
41+
; IF-EVL-NEXT: No successors
42+
; IF-EVL-NEXT: }
43+
44+
entry:
45+
br label %for.body
46+
47+
for.body:
48+
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
49+
%arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
50+
%0 = load i32, ptr %arrayidx, align 4
51+
%arrayidx3 = getelementptr inbounds i32, ptr %c, i64 %indvars.iv
52+
%1 = load i32, ptr %arrayidx3, align 4
53+
%cmp4 = icmp sgt i32 %0, %1
54+
%2 = sub i32 0, %1
55+
%cond.p = select i1 %cmp4, i32 %1, i32 %2
56+
%cond = add i32 %cond.p, %0
57+
%arrayidx15 = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
58+
store i32 %cond, ptr %arrayidx15, align 4
59+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
60+
%exitcond.not = icmp eq i64 %indvars.iv.next, %N
61+
br i1 %exitcond.not, label %exit, label %for.body
62+
63+
exit:
64+
ret void
65+
}

0 commit comments

Comments
 (0)