Skip to content

[LV] Decompose WidenIntOrFPInduction into phi and update recipes #82021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Analysis/IVDescriptors.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,11 @@ class InductionDescriptor {
return nullptr;
}

const Instruction *getExactFPMathInst() const {
return const_cast<const Instruction *>(
const_cast<InductionDescriptor *>(this)->getExactFPMathInst());
}

/// Returns binary opcode of the induction operator.
Instruction::BinaryOps getInductionOpcode() const {
return InductionBinOp ? InductionBinOp->getOpcode()
Expand Down
125 changes: 89 additions & 36 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8130,34 +8130,6 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
return nullptr;
}

VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan) {
// Optimize the special case where the source is a constant integer
// induction variable. Notice that we can only optimize the 'trunc' case
// because (a) FP conversions lose precision, (b) sext/zext may wrap, and
// (c) other casts depend on pointer size.

// Determine whether \p K is a truncation based on an induction variable that
// can be optimized.
auto isOptimizableIVTruncate =
[&](Instruction *K) -> std::function<bool(ElementCount)> {
return [=](ElementCount VF) -> bool {
return CM.isOptimizableIVTruncate(K, VF);
};
};

if (LoopVectorizationPlanner::getDecisionAndClampRange(
isOptimizableIVTruncate(I), Range)) {

auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
VPValue *Start = Plan.getVPValueOrAddLiveIn(II.getStartValue());
return createWidenInductionRecipes(Phi, I, Start, II, Plan, *PSE.getSE(),
*OrigLoop, Range);
}
return nullptr;
}

VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
ArrayRef<VPValue *> Operands,
VPlanPtr &Plan) {
Expand Down Expand Up @@ -8291,6 +8263,71 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
Range);
}

VPWidenCastRecipe *VPRecipeBuilder::createCast(VPValue *V, Type *From,
Type *To) {
if (From == To)
return nullptr;
Instruction::CastOps CastOpcode;
if (To->isIntegerTy() && From->isIntegerTy())
CastOpcode = To->getPrimitiveSizeInBits() < From->getPrimitiveSizeInBits()
? Instruction::Trunc
: Instruction::ZExt;
else if (To->isIntegerTy())
CastOpcode = Instruction::FPToUI;
else
CastOpcode = Instruction::UIToFP;

return new VPWidenCastRecipe(CastOpcode, V, To);
}

VPRecipeBase *
VPRecipeBuilder::createWidenStep(VPWidenIntOrFpInductionRecipe &WIV,
ScalarEvolution &SE, VPlan &Plan,
DenseSet<VPRecipeBase *> *CreatedRecipes) {
PHINode *PN = WIV.getPHINode();
const InductionDescriptor &IndDesc = WIV.getInductionDescriptor();
VPValue *ScalarStep =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
VPTypeAnalysis TypeInfo(nullptr, SE.getContext());
Type *VFxUFTy = TypeInfo.inferScalarType(Plan.getTripCount());
Type *StepTy = IndDesc.getStep()->getType();
VPValue *WidenVFxUF = &Plan.getWidenVFxUF();
VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
if (VPWidenCastRecipe *WidenVFxUFCast =
createCast(&Plan.getWidenVFxUF(), VFxUFTy, StepTy)) {
WidenVFxUFCast->insertBefore(LatchVPBB->getTerminator());
if (CreatedRecipes)
CreatedRecipes->insert(WidenVFxUFCast);
WidenVFxUF = WidenVFxUFCast->getVPSingleValue();
}
const Instruction::BinaryOps UpdateOp =
IndDesc.getInductionOpcode() != Instruction::BinaryOpsEnd
? IndDesc.getInductionOpcode()
: Instruction::Add;
VPInstruction *Update;
if (StepTy->isIntegerTy()) {
VPInstruction *Mul = new VPInstruction(
Instruction::Mul, {WidenVFxUF, ScalarStep}, PN->getDebugLoc());
Mul->insertBefore(LatchVPBB->getTerminator());
if (CreatedRecipes)
CreatedRecipes->insert(Mul);
Update = new VPInstruction(UpdateOp, {&WIV, Mul}, PN->getDebugLoc());
Update->insertBefore(LatchVPBB->getTerminator());
} else {
FastMathFlags FMF = IndDesc.getExactFPMathInst()
? IndDesc.getExactFPMathInst()->getFastMathFlags()
: FastMathFlags();
VPInstruction *Mul = new VPInstruction(
Instruction::FMul, {WidenVFxUF, ScalarStep}, FMF, PN->getDebugLoc());
Mul->insertBefore(LatchVPBB->getTerminator());
Update = new VPInstruction(UpdateOp, {&WIV, Mul}, FMF, PN->getDebugLoc());
Update->insertBefore(LatchVPBB->getTerminator());
}
if (CreatedRecipes)
CreatedRecipes->insert(Update);
return Update;
}

VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
ArrayRef<VPValue *> Operands,
VPBasicBlock *VPBB, VPlanPtr &Plan) {
Expand Down Expand Up @@ -8340,10 +8377,15 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
};
}

void VPRecipeBuilder::fixHeaderPhis() {
void VPRecipeBuilder::fixHeaderPhis(VPlan &Plan) {
BasicBlock *OrigLatch = OrigLoop->getLoopLatch();
for (VPHeaderPHIRecipe *R : PhisToFix) {
auto *PN = cast<PHINode>(R->getUnderlyingValue());
if (auto *VPWIFR = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
VPWIFR->addOperand(
createWidenStep(*VPWIFR, *PSE.getSE(), Plan)->getVPSingleValue());
continue;
}
PHINode *PN = cast<PHINode>(R->getUnderlyingValue());
VPRecipeBase *IncR =
getRecipe(cast<Instruction>(PN->getIncomingValueForBlock(OrigLatch)));
R->addOperand(IncR->getVPSingleValue());
Expand Down Expand Up @@ -8421,8 +8463,12 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
// can have earlier phis as incoming values.
recordRecipeOf(Phi);

if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range)))
if ((Recipe = tryToOptimizeInductionPHI(Phi, Operands, *Plan, Range))) {
if (isa<VPWidenPointerInductionRecipe>(Recipe))
return Recipe;
PhisToFix.push_back(cast<VPWidenIntOrFpInductionRecipe>(Recipe));
return Recipe;
}

VPHeaderPHIRecipe *PhiRecipe = nullptr;
assert((Legal->isReductionVariable(Phi) ||
Expand Down Expand Up @@ -8457,10 +8503,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
return PhiRecipe;
}

if (isa<TruncInst>(Instr) &&
(Recipe = tryToOptimizeInductionTruncate(cast<TruncInst>(Instr), Operands,
Range, *Plan)))
return Recipe;
if (isa<TruncInst>(Instr)) {
auto IsOptimizableIVTruncate =
[&](Instruction *K) -> std::function<bool(ElementCount)> {
return [=](ElementCount VF) -> bool {
return CM.isOptimizableIVTruncate(K, VF);
};
};

LoopVectorizationPlanner::getDecisionAndClampRange(
IsOptimizableIVTruncate(Instr), Range);
}

// All widen recipes below deal only with VF > 1.
if (LoopVectorizationPlanner::getDecisionAndClampRange(
Expand Down Expand Up @@ -8718,7 +8771,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
!Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
"entry block must be set to a VPRegionBlock having a non-empty entry "
"VPBasicBlock");
RecipeBuilder.fixHeaderPhis();
RecipeBuilder.fixHeaderPhis(*Plan);

// ---------------------------------------------------------------------------
// Transform initial VPlan: Apply previously taken decisions, in order, to
Expand Down
14 changes: 13 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,18 @@ class VPRecipeBuilder {
/// between SRC and DST.
VPValue *getEdgeMask(BasicBlock *Src, BasicBlock *Dst) const;

/// A helper function to create VPWidenCastRecipe of a \p V VPValue to a \p To
/// type.
/// FIXME: Remove \p From argument and take it from a \p V value
static VPWidenCastRecipe *createCast(VPValue *V, Type *From, Type *To);

/// A helper function which widens \p WIV step, multiplies it by WidenVFxUF
/// and attaches to loop latch of the \p Plan. Returns multiplication.
static VPRecipeBase *
createWidenStep(VPWidenIntOrFpInductionRecipe &WIV, ScalarEvolution &SE,
VPlan &Plan,
DenseSet<VPRecipeBase *> *CreatedRecipes = nullptr);

/// Mark given ingredient for recording its recipe once one is created for
/// it.
void recordRecipeOf(Instruction *I) {
Expand All @@ -171,7 +183,7 @@ class VPRecipeBuilder {

/// Add the incoming values from the backedge to reduction & first-order
/// recurrence cross-iteration phis.
void fixHeaderPhis();
void fixHeaderPhis(VPlan &Plan);
};
} // end namespace llvm

Expand Down
54 changes: 38 additions & 16 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,12 @@ Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder,
llvm_unreachable("Unknown lane kind");
}

VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
: SubclassID(SC), UnderlyingVal(UV), Def(Def) {
VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def, Type *Ty)
: SubclassID(SC), UnderlyingVal(UV), UnderlyingTy(Ty), Def(Def) {
if (UnderlyingTy)
assert((!UnderlyingVal || UnderlyingVal->getType() == UnderlyingTy) &&
"VPValue with set type should either be created without underlying "
"value or type should match the given type");
if (Def)
Def->addDefinedValue(this);
}
Expand Down Expand Up @@ -808,6 +812,19 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
VFxUF.setUnderlyingValue(
createStepForVF(Builder, TripCountV->getType(), State.VF, State.UF));

if (WidenVFxUF.getNumUsers() > 0) {
if (State.VF.isScalar())
WidenVFxUF.setUnderlyingValue(VFxUF.getUnderlyingValue());
else
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
Value *Step =
createStepForVF(Builder, TripCountV->getType(), State.VF, Part + 1);
State.set(&WidenVFxUF,
Builder.CreateVectorSplat(State.VF, Step, "widen.vfxuf"),
Part);
}
}

// When vectorizing the epilogue loop, the canonical induction start value
// needs to be changed from zero to the value after the main vector loop.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
Expand Down Expand Up @@ -853,21 +870,16 @@ void VPlan::execute(VPTransformState *State) {
if (isa<VPWidenPHIRecipe>(&R))
continue;

if (isa<VPWidenPointerInductionRecipe>(&R) ||
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
if (isa<VPWidenPointerInductionRecipe>(&R)) {
PHINode *Phi = nullptr;
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
Phi = cast<PHINode>(State->get(R.getVPSingleValue(), 0));
} else {
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
// TODO: Split off the case that all users of a pointer phi are scalar
// from the VPWidenPointerInductionRecipe.
if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
continue;

auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
Phi = cast<PHINode>(GEP->getPointerOperand());
}
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
// TODO: Split off the case that all users of a pointer phi are scalar
// from the VPWidenPointerInductionRecipe.
if (WidenPhi->onlyScalarsGenerated(State->VF.isScalable()))
continue;

auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi, 0));
Phi = cast<PHINode>(GEP->getPointerOperand());

Phi->setIncomingBlock(1, VectorLatchBB);

Expand All @@ -885,6 +897,7 @@ void VPlan::execute(VPTransformState *State) {
// generated.
bool SinglePartNeeded = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
isa<VPFirstOrderRecurrencePHIRecipe>(PhiR) ||
isa<VPWidenIntOrFpInductionRecipe>(PhiR) ||
(isa<VPReductionPHIRecipe>(PhiR) &&
cast<VPReductionPHIRecipe>(PhiR)->isOrdered());
bool NeedsScalar = isa<VPCanonicalIVPHIRecipe>(PhiR) ||
Expand Down Expand Up @@ -920,6 +933,12 @@ void VPlan::printLiveIns(raw_ostream &O) const {
O << " = VF * UF";
}

if (WidenVFxUF.getNumUsers() > 0) {
O << "\nLive-in ";
WidenVFxUF.printAsOperand(O, SlotTracker);
O << " = WIDEN VF * UF";
}

if (VectorTripCount.getNumUsers() > 0) {
O << "\nLive-in ";
VectorTripCount.printAsOperand(O, SlotTracker);
Expand Down Expand Up @@ -1095,6 +1114,7 @@ VPlan *VPlan::duplicate() {
}
Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount;
Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF;
Old2NewVPValues[&WidenVFxUF] = &NewPlan->WidenVFxUF;
if (BackedgeTakenCount) {
NewPlan->BackedgeTakenCount = new VPValue();
Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount;
Expand Down Expand Up @@ -1391,6 +1411,8 @@ void VPSlotTracker::assignSlot(const VPValue *V) {
void VPSlotTracker::assignSlots(const VPlan &Plan) {
if (Plan.VFxUF.getNumUsers() > 0)
assignSlot(&Plan.VFxUF);
if (Plan.WidenVFxUF.getNumUsers() > 0)
assignSlot(&Plan.WidenVFxUF);
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
assignSlot(Plan.BackedgeTakenCount);
Expand Down
Loading