Skip to content

Commit efe84eb

Browse files
committed
[VPlan] Remove VPBlendRecipe, replace with select VPInstructions
1 parent 1debf23 commit efe84eb

17 files changed

+53
-415
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4081,7 +4081,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
40814081
case VPDef::VPWidenIntrinsicSC:
40824082
case VPDef::VPWidenSC:
40834083
case VPDef::VPWidenSelectSC:
4084-
case VPDef::VPBlendSC:
40854084
case VPDef::VPFirstOrderRecurrencePHISC:
40864085
case VPDef::VPHistogramSC:
40874086
case VPDef::VPWidenPHISC:
@@ -4203,10 +4202,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
42034202
if (!VPI)
42044203
continue;
42054204
switch (VPI->getOpcode()) {
4206-
// Selects are only modelled in the legacy cost model for safe
4207-
// divisors.
42084205
case Instruction::Select: {
42094206
VPValue *VPV = VPI->getVPSingleValue();
4207+
// Blend selects are modelled in VPlan.
4208+
if (isa_and_nonnull<PHINode>(VPV->getUnderlyingValue()))
4209+
continue;
4210+
// Selects are only modelled in the legacy cost model for safe
4211+
// divisors.
42104212
if (VPV->getNumUsers() == 1) {
42114213
if (auto *WR = dyn_cast<VPWidenRecipe>(*VPV->user_begin())) {
42124214
switch (WR->getOpcode()) {
@@ -8656,9 +8658,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
86568658
// latter are added above for masking.
86578659
// FIXME: Migrate code relying on the underlying instruction from VPlan0
86588660
// to construct recipes below to not use the underlying instruction.
8659-
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
8660-
&R) ||
8661-
(isa<VPInstruction>(&R) && !UnderlyingValue))
8661+
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe>(&R) ||
8662+
(isa<VPInstruction>(&R) && !UnderlyingValue) ||
8663+
(match(&R, m_VPInstruction<Instruction::Select>(
8664+
m_VPValue(), m_VPValue(), m_VPValue())) &&
8665+
isa_and_nonnull<PHINode>(UnderlyingValue)))
86628666
continue;
86638667

86648668
// FIXME: VPlan0, which models a copy of the original scalar loop, should
@@ -8944,20 +8948,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
89448948
// the phi until LoopExitValue. We keep track of the previous item
89458949
// (PreviousLink) to tell which of the two operands of a Link will remain
89468950
// scalar and which will be reduced. For minmax by select(cmp), Link will be
8947-
// the select instructions. Blend recipes of in-loop reduction phi's will
8951+
// the select instructions. Blend selects of in-loop reduction phi's will
89488952
// get folded to their non-phi operand, as the reduction recipe handles the
89498953
// condition directly.
89508954
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
89518955
for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
8952-
if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
8953-
assert(Blend->getNumIncomingValues() == 2 &&
8954-
"Blend must have 2 incoming values");
8955-
if (Blend->getIncomingValue(0) == PhiR) {
8956-
Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
8956+
using namespace VPlanPatternMatch;
8957+
VPValue *T, *F;
8958+
if (match(CurrentLink, m_VPInstruction<Instruction::Select>(
8959+
m_VPValue(), m_VPValue(T), m_VPValue(F)))) {
8960+
if (T == PhiR) {
8961+
CurrentLink->replaceAllUsesWith(F);
89578962
} else {
8958-
assert(Blend->getIncomingValue(1) == PhiR &&
8959-
"PhiR must be an operand of the blend");
8960-
Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
8963+
assert(F == PhiR && "PhiR must be an operand of the select");
8964+
CurrentLink->replaceAllUsesWith(T);
89618965
}
89628966
continue;
89638967
}

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 0 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
545545
case VPRecipeBase::VPWidenIntrinsicSC:
546546
case VPRecipeBase::VPWidenSC:
547547
case VPRecipeBase::VPWidenSelectSC:
548-
case VPRecipeBase::VPBlendSC:
549548
case VPRecipeBase::VPPredInstPHISC:
550549
case VPRecipeBase::VPCanonicalIVPHISC:
551550
case VPRecipeBase::VPActiveLaneMaskPHISC:
@@ -2369,72 +2368,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
23692368
}
23702369
};
23712370

2372-
/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2373-
/// instructions.
2374-
class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
2375-
public:
2376-
/// The blend operation is a User of the incoming values and of their
2377-
/// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2378-
/// be omitted (implied by passing an odd number of operands) in which case
2379-
/// all other incoming values are merged into it.
2380-
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL)
2381-
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
2382-
assert(Operands.size() > 0 && "Expected at least one operand!");
2383-
}
2384-
2385-
VPBlendRecipe *clone() override {
2386-
return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()),
2387-
operands(), getDebugLoc());
2388-
}
2389-
2390-
VP_CLASSOF_IMPL(VPDef::VPBlendSC)
2391-
2392-
/// A normalized blend is one that has an odd number of operands, whereby the
2393-
/// first operand does not have an associated mask.
2394-
bool isNormalized() const { return getNumOperands() % 2; }
2395-
2396-
/// Return the number of incoming values, taking into account when normalized
2397-
/// the first incoming value will have no mask.
2398-
unsigned getNumIncomingValues() const {
2399-
return (getNumOperands() + isNormalized()) / 2;
2400-
}
2401-
2402-
/// Return incoming value number \p Idx.
2403-
VPValue *getIncomingValue(unsigned Idx) const {
2404-
return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
2405-
}
2406-
2407-
/// Return mask number \p Idx.
2408-
VPValue *getMask(unsigned Idx) const {
2409-
assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
2410-
return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
2411-
}
2412-
2413-
void execute(VPTransformState &State) override {
2414-
llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
2415-
}
2416-
2417-
/// Return the cost of this VPWidenMemoryRecipe.
2418-
InstructionCost computeCost(ElementCount VF,
2419-
VPCostContext &Ctx) const override;
2420-
2421-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2422-
/// Print the recipe.
2423-
void print(raw_ostream &O, const Twine &Indent,
2424-
VPSlotTracker &SlotTracker) const override;
2425-
#endif
2426-
2427-
/// Returns true if the recipe only uses the first lane of operand \p Op.
2428-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
2429-
assert(is_contained(operands(), Op) &&
2430-
"Op must be an operand of the recipe");
2431-
// Recursing through Blend recipes only, must terminate at header phi's the
2432-
// latest.
2433-
return all_of(users(),
2434-
[this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
2435-
}
2436-
};
2437-
24382371
/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
24392372
/// or stores into one wide load/store and shuffles. The first operand of a
24402373
/// VPInterleave recipe is the address, followed by the stored values, followed

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,6 @@ VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) {
4040
CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
4141
}
4242

43-
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
44-
Type *ResTy = inferScalarType(R->getIncomingValue(0));
45-
for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
46-
VPValue *Inc = R->getIncomingValue(I);
47-
assert(inferScalarType(Inc) == ResTy &&
48-
"different types inferred for different incoming values");
49-
CachedTypes[Inc] = ResTy;
50-
}
51-
return ResTy;
52-
}
53-
5443
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
5544
// Set the result type from the first operand, check if the types for all
5645
// other operands match and cache them.
@@ -293,7 +282,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
293282
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
294283
VPWidenCastRecipe>(
295284
[](const auto *R) { return R->getResultType(); })
296-
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
285+
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
297286
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
298287
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
299288
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {

llvm/lib/Transforms/Vectorize/VPlanAnalysis.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ namespace llvm {
1818

1919
class LLVMContext;
2020
class VPValue;
21-
class VPBlendRecipe;
2221
class VPInstruction;
2322
class VPWidenRecipe;
2423
class VPWidenCallRecipe;
@@ -48,7 +47,6 @@ class VPTypeAnalysis {
4847
Type *CanonicalIVTy;
4948
LLVMContext &Ctx;
5049

51-
Type *inferScalarTypeForRecipe(const VPBlendRecipe *R);
5250
Type *inferScalarTypeForRecipe(const VPInstruction *R);
5351
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
5452
Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);

llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@ class VPPredicator {
8484
/// block of the loop is set to True, or to the loop mask when tail folding.
8585
VPValue *createBlockInMask(VPBasicBlock *VPBB);
8686

87-
/// Convert phi recipes in \p VPBB to VPBlendRecipes.
88-
void convertPhisToBlends(VPBasicBlock *VPBB);
87+
/// Convert phi recipes in \p VPBB to selects.
88+
void convertPhisToSelects(VPBasicBlock *VPBB);
8989

9090
const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
9191
};
@@ -247,7 +247,7 @@ VPValue *VPPredicator::findCommonEdgeMask(const VPPhi *PhiR) const {
247247
return CommonEdgeMask;
248248
}
249249

250-
void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
250+
void VPPredicator::convertPhisToSelects(VPBasicBlock *VPBB) {
251251
SmallVector<VPPhi *> Phis;
252252
for (VPRecipeBase &R : VPBB->phis())
253253
Phis.push_back(cast<VPPhi>(&R));
@@ -259,8 +259,10 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
259259
// optimizations will clean it up.
260260

261261
VPValue *CommonEdgeMask = findCommonEdgeMask(PhiR);
262+
VPValue *Select = PhiR->getIncomingValue(0);
262263
SmallVector<VPValue *, 2> OperandsWithMask;
263-
for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
264+
for (const auto &[InVPV, InVPBB] :
265+
drop_begin(PhiR->incoming_values_and_blocks())) {
264266
OperandsWithMask.push_back(InVPV);
265267
VPValue *EdgeMask = getEdgeMask(InVPBB, VPBB);
266268
if (!EdgeMask) {
@@ -277,13 +279,11 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
277279
EdgeMask = X;
278280
}
279281

280-
OperandsWithMask.push_back(EdgeMask);
282+
Select =
283+
Builder.createSelect(EdgeMask, InVPV, Select, PhiR->getDebugLoc());
284+
Select->setUnderlyingValue(PhiR->getUnderlyingValue());
281285
}
282-
PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
283-
auto *Blend =
284-
new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc());
285-
Builder.insert(Blend);
286-
PhiR->replaceAllUsesWith(Blend);
286+
PhiR->replaceAllUsesWith(Select);
287287
PhiR->eraseFromParent();
288288
}
289289
}
@@ -309,7 +309,7 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
309309
}
310310

311311
Predicator.createBlockInMask(VPBB);
312-
Predicator.convertPhisToBlends(VPBB);
312+
Predicator.convertPhisToSelects(VPBB);
313313
}
314314

315315
// Linearize the blocks of the loop into one serial chain.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
7373
case VPScalarIVStepsSC:
7474
case VPPredInstPHISC:
7575
return false;
76-
case VPBlendSC:
7776
case VPReductionEVLSC:
7877
case VPReductionSC:
7978
case VPVectorPointerSC:
@@ -123,7 +122,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
123122
case VPWidenStoreEVLSC:
124123
case VPWidenStoreSC:
125124
return false;
126-
case VPBlendSC:
127125
case VPReductionEVLSC:
128126
case VPReductionSC:
129127
case VPVectorPointerSC:
@@ -163,7 +161,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
163161
}
164162
case VPWidenIntrinsicSC:
165163
return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
166-
case VPBlendSC:
167164
case VPReductionEVLSC:
168165
case VPReductionSC:
169166
case VPScalarIVStepsSC:
@@ -2581,44 +2578,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
25812578
}
25822579
#endif
25832580

2584-
InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
2585-
VPCostContext &Ctx) const {
2586-
// Handle cases where only the first lane is used the same way as the legacy
2587-
// cost model.
2588-
if (vputils::onlyFirstLaneUsed(this))
2589-
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);
2590-
2591-
Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
2592-
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
2593-
return (getNumIncomingValues() - 1) *
2594-
Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
2595-
CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);
2596-
}
2597-
2598-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2599-
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
2600-
VPSlotTracker &SlotTracker) const {
2601-
O << Indent << "BLEND ";
2602-
printAsOperand(O, SlotTracker);
2603-
O << " =";
2604-
if (getNumIncomingValues() == 1) {
2605-
// Not a User of any mask: not really blending, this is a
2606-
// single-predecessor phi.
2607-
O << " ";
2608-
getIncomingValue(0)->printAsOperand(O, SlotTracker);
2609-
} else {
2610-
for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
2611-
O << " ";
2612-
getIncomingValue(I)->printAsOperand(O, SlotTracker);
2613-
if (I == 0)
2614-
continue;
2615-
O << "/";
2616-
getMask(I)->printAsOperand(O, SlotTracker);
2617-
}
2618-
}
2619-
}
2620-
#endif
2621-
26222581
void VPReductionRecipe::execute(VPTransformState &State) {
26232582
assert(!State.Lane && "Reduction being replicated.");
26242583
Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);

0 commit comments

Comments
 (0)