Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 19 additions & 15 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4081,7 +4081,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
case VPDef::VPWidenSelectSC:
case VPDef::VPBlendSC:
case VPDef::VPFirstOrderRecurrencePHISC:
case VPDef::VPHistogramSC:
case VPDef::VPWidenPHISC:
Expand Down Expand Up @@ -4203,10 +4202,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
if (!VPI)
continue;
switch (VPI->getOpcode()) {
// Selects are only modelled in the legacy cost model for safe
// divisors.
case Instruction::Select: {
VPValue *VPV = VPI->getVPSingleValue();
// Blend selects are modelled in VPlan.
if (isa_and_nonnull<PHINode>(VPV->getUnderlyingValue()))
continue;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
continue;
continue;

// Selects are only modelled in the legacy cost model for safe
// divisors.
if (VPV->getNumUsers() == 1) {
if (auto *WR = dyn_cast<VPWidenRecipe>(*VPV->user_begin())) {
switch (WR->getOpcode()) {
Expand Down Expand Up @@ -8656,9 +8658,11 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe>(&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue) ||
(match(&R, m_VPInstruction<Instruction::Select>(
m_VPValue(), m_VPValue(), m_VPValue())) &&
Comment on lines +8663 to +8664
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be good to introduce an empty variant of the select matcher?

isa_and_nonnull<PHINode>(UnderlyingValue)))
continue;

// FIXME: VPlan0, which models a copy of the original scalar loop, should
Expand Down Expand Up @@ -8944,20 +8948,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
// the phi until LoopExitValue. We keep track of the previous item
// (PreviousLink) to tell which of the two operands of a Link will remain
// scalar and which will be reduced. For minmax by select(cmp), Link will be
// the select instructions. Blend recipes of in-loop reduction phi's will
// the select instructions. Blend selects of in-loop reduction phi's will
// get folded to their non-phi operand, as the reduction recipe handles the
// condition directly.
VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
assert(Blend->getNumIncomingValues() == 2 &&
"Blend must have 2 incoming values");
if (Blend->getIncomingValue(0) == PhiR) {
Blend->replaceAllUsesWith(Blend->getIncomingValue(1));
using namespace VPlanPatternMatch;
VPValue *T, *F;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
VPValue *T, *F;
const VPValue *T, *F;

if (match(CurrentLink, m_VPInstruction<Instruction::Select>(
m_VPValue(), m_VPValue(T), m_VPValue(F)))) {
if (T == PhiR) {
CurrentLink->replaceAllUsesWith(F);
} else {
assert(Blend->getIncomingValue(1) == PhiR &&
"PhiR must be an operand of the blend");
Blend->replaceAllUsesWith(Blend->getIncomingValue(0));
assert(F == PhiR && "PhiR must be an operand of the select");
CurrentLink->replaceAllUsesWith(T);
}
Comment on lines +8960 to 8965
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CurrentLink->replaceAllUsesWith(T == PhiR ? F : T);

continue;
}
Expand Down
67 changes: 0 additions & 67 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
case VPRecipeBase::VPCanonicalIVPHISC:
case VPRecipeBase::VPActiveLaneMaskPHISC:
Expand Down Expand Up @@ -2369,72 +2368,6 @@ class VPReductionPHIRecipe : public VPHeaderPHIRecipe,
}
};

/// A recipe for vectorizing a phi-node as a sequence of mask-based select
/// instructions.
class LLVM_ABI_FOR_TEST VPBlendRecipe : public VPSingleDefRecipe {
public:
/// The blend operation is a User of the incoming values and of their
/// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
/// be omitted (implied by passing an odd number of operands) in which case
/// all other incoming values are merged into it.
VPBlendRecipe(PHINode *Phi, ArrayRef<VPValue *> Operands, DebugLoc DL)
: VPSingleDefRecipe(VPDef::VPBlendSC, Operands, Phi, DL) {
assert(Operands.size() > 0 && "Expected at least one operand!");
}

VPBlendRecipe *clone() override {
return new VPBlendRecipe(cast_or_null<PHINode>(getUnderlyingValue()),
operands(), getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPBlendSC)

/// A normalized blend is one that has an odd number of operands, whereby the
/// first operand does not have an associated mask.
bool isNormalized() const { return getNumOperands() % 2; }

/// Return the number of incoming values, taking into account when normalized
/// the first incoming value will have no mask.
unsigned getNumIncomingValues() const {
return (getNumOperands() + isNormalized()) / 2;
}

/// Return incoming value number \p Idx.
VPValue *getIncomingValue(unsigned Idx) const {
return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
}

/// Return mask number \p Idx.
VPValue *getMask(unsigned Idx) const {
assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
}

void execute(VPTransformState &State) override {
llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
}

/// Return the cost of this VPWidenMemoryRecipe.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// Recursing through Blend recipes only, must terminate at header phi's the
// latest.
return all_of(users(),
[this](VPUser *U) { return U->onlyFirstLaneUsed(this); });
}
};

/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
/// or stores into one wide load/store and shuffles. The first operand of a
/// VPInterleave recipe is the address, followed by the stored values, followed
Expand Down
13 changes: 1 addition & 12 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,6 @@ VPTypeAnalysis::VPTypeAnalysis(const VPlan &Plan) : Ctx(Plan.getContext()) {
CanonicalIVTy = cast<VPExpandSCEVRecipe>(TC)->getSCEV()->getType();
}

Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPBlendRecipe *R) {
Type *ResTy = inferScalarType(R->getIncomingValue(0));
for (unsigned I = 1, E = R->getNumIncomingValues(); I != E; ++I) {
VPValue *Inc = R->getIncomingValue(I);
assert(inferScalarType(Inc) == ResTy &&
"different types inferred for different incoming values");
CachedTypes[Inc] = ResTy;
}
return ResTy;
}

Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
// Set the result type from the first operand, check if the types for all
// other operands match and cache them.
Expand Down Expand Up @@ -293,7 +282,7 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
.Case<VPInstructionWithType, VPWidenIntrinsicRecipe,
VPWidenCastRecipe>(
[](const auto *R) { return R->getResultType(); })
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ namespace llvm {

class LLVMContext;
class VPValue;
class VPBlendRecipe;
class VPInstruction;
class VPWidenRecipe;
class VPWidenCallRecipe;
Expand Down Expand Up @@ -48,7 +47,6 @@ class VPTypeAnalysis {
Type *CanonicalIVTy;
LLVMContext &Ctx;

Type *inferScalarTypeForRecipe(const VPBlendRecipe *R);
Type *inferScalarTypeForRecipe(const VPInstruction *R);
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);
Expand Down
22 changes: 11 additions & 11 deletions llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,8 @@ class VPPredicator {
/// block of the loop is set to True, or to the loop mask when tail folding.
VPValue *createBlockInMask(VPBasicBlock *VPBB);

/// Convert phi recipes in \p VPBB to VPBlendRecipes.
void convertPhisToBlends(VPBasicBlock *VPBB);
/// Convert phi recipes in \p VPBB to selects.
void convertPhisToSelects(VPBasicBlock *VPBB);

const BlockMaskCacheTy getBlockMaskCache() const { return BlockMaskCache; }
};
Expand Down Expand Up @@ -247,7 +247,7 @@ VPValue *VPPredicator::findCommonEdgeMask(const VPPhi *PhiR) const {
return CommonEdgeMask;
}

void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
void VPPredicator::convertPhisToSelects(VPBasicBlock *VPBB) {
SmallVector<VPPhi *> Phis;
for (VPRecipeBase &R : VPBB->phis())
Phis.push_back(cast<VPPhi>(&R));
Expand All @@ -259,8 +259,10 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
// optimizations will clean it up.

VPValue *CommonEdgeMask = findCommonEdgeMask(PhiR);
VPValue *Select = PhiR->getIncomingValue(0);
SmallVector<VPValue *, 2> OperandsWithMask;
for (const auto &[InVPV, InVPBB] : PhiR->incoming_values_and_blocks()) {
for (const auto &[InVPV, InVPBB] :
drop_begin(PhiR->incoming_values_and_blocks())) {
OperandsWithMask.push_back(InVPV);
VPValue *EdgeMask = getEdgeMask(InVPBB, VPBB);
if (!EdgeMask) {
Expand All @@ -277,13 +279,11 @@ void VPPredicator::convertPhisToBlends(VPBasicBlock *VPBB) {
EdgeMask = X;
}

OperandsWithMask.push_back(EdgeMask);
Select =
Builder.createSelect(EdgeMask, InVPV, Select, PhiR->getDebugLoc());
Select->setUnderlyingValue(PhiR->getUnderlyingValue());
}
PHINode *IRPhi = cast_or_null<PHINode>(PhiR->getUnderlyingValue());
auto *Blend =
new VPBlendRecipe(IRPhi, OperandsWithMask, PhiR->getDebugLoc());
Builder.insert(Blend);
PhiR->replaceAllUsesWith(Blend);
PhiR->replaceAllUsesWith(Select);
PhiR->eraseFromParent();
}
}
Expand All @@ -309,7 +309,7 @@ VPlanTransforms::introduceMasksAndLinearize(VPlan &Plan, bool FoldTail) {
}

Predicator.createBlockInMask(VPBB);
Predicator.convertPhisToBlends(VPBB);
Predicator.convertPhisToSelects(VPBB);
}

// Linearize the blocks of the loop into one serial chain.
Expand Down
41 changes: 0 additions & 41 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPScalarIVStepsSC:
case VPPredInstPHISC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
case VPReductionSC:
case VPVectorPointerSC:
Expand Down Expand Up @@ -123,7 +122,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenStoreEVLSC:
case VPWidenStoreSC:
return false;
case VPBlendSC:
case VPReductionEVLSC:
case VPReductionSC:
case VPVectorPointerSC:
Expand Down Expand Up @@ -163,7 +161,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
}
case VPWidenIntrinsicSC:
return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();
case VPBlendSC:
case VPReductionEVLSC:
case VPReductionSC:
case VPScalarIVStepsSC:
Expand Down Expand Up @@ -2581,44 +2578,6 @@ void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif

InstructionCost VPBlendRecipe::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
// Handle cases where only the first lane is used the same way as the legacy
// cost model.
if (vputils::onlyFirstLaneUsed(this))
return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

Type *ResultTy = toVectorTy(Ctx.Types.inferScalarType(this), VF);
Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.Types.getContext()), VF);
return (getNumIncomingValues() - 1) *
Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,
CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "BLEND ";
printAsOperand(O, SlotTracker);
O << " =";
if (getNumIncomingValues() == 1) {
// Not a User of any mask: not really blending, this is a
// single-predecessor phi.
O << " ";
getIncomingValue(0)->printAsOperand(O, SlotTracker);
} else {
for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
O << " ";
getIncomingValue(I)->printAsOperand(O, SlotTracker);
if (I == 0)
continue;
O << "/";
getMask(I)->printAsOperand(O, SlotTracker);
}
}
}
#endif

void VPReductionRecipe::execute(VPTransformState &State) {
assert(!State.Lane && "Reduction being replicated.");
Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);
Expand Down
Loading
Loading