Skip to content

Commit c274eea

Browse files
committed
[VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI).
!fixup update more tests.
1 parent cfde685 commit c274eea

12 files changed

+295
-265
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+21-10
Original file line numberDiff line numberDiff line change
@@ -2760,6 +2760,15 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
27602760
return TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
27612761
}
27622762

2763+
static VPBasicBlock *getHeaderForMainVectorLoop(VPlan &Plan,
2764+
VPDominatorTree &VPDT) {
2765+
return find_singleton<VPBasicBlock>(
2766+
vp_depth_first_shallow(Plan.getEntry()), [&VPDT](VPBlockBase *VPB, bool) {
2767+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
2768+
return VPBB && VPBB->isHeader(VPDT) ? VPBB : nullptr;
2769+
});
2770+
}
2771+
27632772
void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
27642773
// Fix widened non-induction PHIs by setting up the PHI operands.
27652774
if (EnableVPlanNativePath)
@@ -2778,13 +2787,13 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
27782787
PSE.getSE()->forgetLoop(OrigLoop);
27792788
PSE.getSE()->forgetBlockAndLoopDispositions();
27802789

2781-
// Don't apply optimizations below when no vector region remains, as they all
2782-
// require a vector loop at the moment.
2783-
if (!State.Plan->getVectorLoopRegion())
2790+
// Don't apply optimizations below when no vector loop remains, as they all
2791+
// require one at the moment.
2792+
VPBasicBlock *HeaderVPBB =
2793+
getHeaderForMainVectorLoop(*State.Plan, State.VPDT);
2794+
if (!HeaderVPBB)
27842795
return;
27852796

2786-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
2787-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
27882797
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
27892798

27902799
// Remove redundant induction instructions.
@@ -2809,7 +2818,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28092818
}
28102819

28112820
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
2812-
auto Iter = vp_depth_first_deep(Plan.getEntry());
2821+
auto Iter = vp_depth_first_shallow(Plan.getEntry());
28132822
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
28142823
for (VPRecipeBase &P : VPBB->phis()) {
28152824
VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
@@ -7799,6 +7808,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77997808
BestVPlan, BestVF,
78007809
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
78017810
VPlanTransforms::removeDeadRecipes(BestVPlan);
7811+
7812+
VPBasicBlock *MiddleVPBB =
7813+
BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr;
78027814
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
78037815
*Legal->getWidestInductionType());
78047816

@@ -7894,14 +7906,14 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78947906
// 2.6. Maintain Loop Hints
78957907
// Keep all loop hints from the original loop on the vector loop (we'll
78967908
// replace the vectorizer-specific hints below).
7897-
if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
7909+
VPBasicBlock *HeaderVPBB = getHeaderForMainVectorLoop(BestVPlan, State.VPDT);
7910+
if (HeaderVPBB) {
78987911
MDNode *OrigLoopID = OrigLoop->getLoopID();
78997912

79007913
std::optional<MDNode *> VectorizedLoopID =
79017914
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
79027915
LLVMLoopVectorizeFollowupVectorized});
79037916

7904-
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
79057917
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
79067918
if (VectorizedLoopID) {
79077919
L->setLoopID(*VectorizedLoopID);
@@ -7947,8 +7959,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79477959
ILV.printDebugTracesAtEnd();
79487960

79497961
// 4. Adjust branch weight of the branch in the middle block.
7950-
if (BestVPlan.getVectorLoopRegion()) {
7951-
auto *MiddleVPBB = BestVPlan.getMiddleBlock();
7962+
if (HeaderVPBB) {
79527963
auto *MiddleTerm =
79537964
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());
79547965
if (MiddleTerm->isConditional() &&

llvm/lib/Transforms/Vectorize/VPlan.cpp

+108-83
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207207
return Parent->getEnclosingBlockWithPredecessors();
208208
}
209209

210+
bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
211+
return getPredecessors().size() == 2 &&
212+
VPDT.dominates(this, getPredecessors()[1]);
213+
}
214+
210215
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
211216
iterator It = begin();
212217
while (It != end() && It->isPhi())
@@ -424,14 +429,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
424429
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
425430
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
426431
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
427-
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
432+
BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB);
433+
if (!PredBB)
434+
continue;
428435

429436
assert(PredBB && "Predecessor basic-block not found building successor.");
430437
auto *PredBBTerminator = PredBB->getTerminator();
431438
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
432439

433440
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
434441
if (isa<UnreachableInst>(PredBBTerminator)) {
442+
if (PredVPSuccessors.size() == 2)
443+
continue;
435444
assert(PredVPSuccessors.size() == 1 &&
436445
"Predecessor ending w/o branch must have single successor.");
437446
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -487,11 +496,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
487496
bool Replica = bool(State->Lane);
488497
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
489498

499+
if (isHeader(State->VPDT)) {
500+
// Create and register the new vector loop.
501+
Loop *PrevParentLoop = State->CurrentParentLoop;
502+
State->CurrentParentLoop = State->LI->AllocateLoop();
503+
504+
// Insert the new loop into the loop nest and register the new basic blocks
505+
// before calling any utilities such as SCEV that require valid LoopInfo.
506+
if (PrevParentLoop)
507+
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
508+
else
509+
State->LI->addTopLevelLoop(State->CurrentParentLoop);
510+
}
511+
490512
auto IsReplicateRegion = [](VPBlockBase *BB) {
491513
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
492-
return R && R->isReplicator();
514+
assert((!R || R->isReplicator()) &&
515+
"only replicate region blocks should remain");
516+
return R;
493517
};
494-
495518
// 1. Create an IR basic block.
496519
if ((Replica && this == getParent()->getEntry()) ||
497520
IsReplicateRegion(getSingleHierarchicalPredecessor())) {
@@ -514,6 +537,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
514537

515538
// 2. Fill the IR basic block with IR instructions.
516539
executeRecipes(State, NewBB);
540+
541+
// If this block is a latch, update CurrentParentLoop.
542+
if (any_of(getSuccessors(), [State, this](VPBlockBase *Succ) {
543+
auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
544+
return VPBB && VPBB->isHeader(State->VPDT) &&
545+
State->VPDT.dominates(Succ, this);
546+
}))
547+
State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
517548
}
518549

519550
VPBasicBlock *VPBasicBlock::clone() {
@@ -725,35 +756,13 @@ VPRegionBlock *VPRegionBlock::clone() {
725756
}
726757

727758
void VPRegionBlock::execute(VPTransformState *State) {
728-
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
729-
RPOT(Entry);
730-
731-
if (!isReplicator()) {
732-
// Create and register the new vector loop.
733-
Loop *PrevParentLoop = State->CurrentParentLoop;
734-
State->CurrentParentLoop = State->LI->AllocateLoop();
735-
736-
// Insert the new loop into the loop nest and register the new basic blocks
737-
// before calling any utilities such as SCEV that require valid LoopInfo.
738-
if (PrevParentLoop)
739-
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
740-
else
741-
State->LI->addTopLevelLoop(State->CurrentParentLoop);
742-
743-
// Visit the VPBlocks connected to "this", starting from it.
744-
for (VPBlockBase *Block : RPOT) {
745-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
746-
Block->execute(State);
747-
}
748-
749-
State->CurrentParentLoop = PrevParentLoop;
750-
return;
751-
}
752-
759+
assert(isReplicator() &&
760+
"Loop regions should have been lowered to plain CFG");
753761
assert(!State->Lane && "Replicating a Region with non-null instance.");
754-
755-
// Enter replicating mode.
756762
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
763+
764+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
765+
Entry);
757766
State->Lane = VPLane(0);
758767
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
759768
++Lane) {
@@ -847,6 +856,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
847856
}
848857
#endif
849858

859+
void VPRegionBlock::removeRegion() {
860+
auto *Header = cast<VPBasicBlock>(getEntry());
861+
VPBlockBase *Preheader = getSinglePredecessor();
862+
auto *Exiting = cast<VPBasicBlock>(getExiting());
863+
864+
VPBlockBase *Middle = getSingleSuccessor();
865+
VPBlockUtils::disconnectBlocks(Preheader, this);
866+
VPBlockUtils::disconnectBlocks(this, Middle);
867+
868+
for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
869+
VPB->setParent(nullptr);
870+
871+
VPBlockUtils::connectBlocks(Preheader, Header);
872+
VPBlockUtils::connectBlocks(Exiting, Middle);
873+
}
874+
850875
VPlan::VPlan(Loop *L) {
851876
setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
852877
ScalarHeader = createVPIRBasicBlock(L->getHeader());
@@ -956,57 +981,57 @@ void VPlan::execute(VPTransformState *State) {
956981
for (VPBlockBase *Block : RPOT)
957982
Block->execute(State);
958983

959-
State->CFG.DTU.flush();
960-
961-
auto *LoopRegion = getVectorLoopRegion();
962-
if (!LoopRegion)
963-
return;
964-
965-
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
966-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
967-
968984
// Fix the latch value of canonical, reduction and first-order recurrences
969985
// phis in the vector loop.
970-
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
971-
for (VPRecipeBase &R : Header->phis()) {
972-
// Skip phi-like recipes that generate their backedege values themselves.
973-
if (isa<VPWidenPHIRecipe>(&R))
986+
for (VPBasicBlock *Header :
987+
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
988+
if (!Header->isHeader(State->VPDT))
974989
continue;
990+
for (VPRecipeBase &R : Header->phis()) {
991+
if (isa<VPWidenPHIRecipe>(&R))
992+
continue;
975993

976-
if (isa<VPWidenInductionRecipe>(&R)) {
977-
PHINode *Phi = nullptr;
978-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
979-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
980-
} else {
981-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
982-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
983-
"recipe generating only scalars should have been replaced");
984-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
985-
Phi = cast<PHINode>(GEP->getPointerOperand());
994+
auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
995+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
996+
997+
if (isa<VPWidenInductionRecipe>(&R)) {
998+
PHINode *Phi = nullptr;
999+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1000+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1001+
} else {
1002+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1003+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1004+
"recipe generating only scalars should have been replaced");
1005+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1006+
Phi = cast<PHINode>(GEP->getPointerOperand());
1007+
}
1008+
1009+
Phi->setIncomingBlock(1, VectorLatchBB);
1010+
1011+
// Move the last step to the end of the latch block. This ensures
1012+
// consistent placement of all induction updates.
1013+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1014+
Inc->moveBefore(
1015+
std::prev(VectorLatchBB->getTerminator()->getIterator()));
1016+
1017+
// Use the steps for the last part as backedge value for the induction.
1018+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1019+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1020+
continue;
9861021
}
9871022

988-
Phi->setIncomingBlock(1, VectorLatchBB);
989-
990-
// Move the last step to the end of the latch block. This ensures
991-
// consistent placement of all induction updates.
992-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
993-
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
994-
995-
// Use the steps for the last part as backedge value for the induction.
996-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
997-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
998-
continue;
1023+
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1024+
// VPInstructions currently model scalar Phis only.
1025+
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1026+
(isa<VPReductionPHIRecipe>(PhiR) &&
1027+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1028+
1029+
Value *Phi = State->get(PhiR, NeedsScalar);
1030+
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1031+
// not.
1032+
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1033+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
9991034
}
1000-
1001-
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1002-
// VPInstructions currently model scalar Phis only.
1003-
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1004-
(isa<VPReductionPHIRecipe>(PhiR) &&
1005-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1006-
Value *Phi = State->get(PhiR, NeedsScalar);
1007-
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1008-
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1009-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10101035
}
10111036
}
10121037

@@ -1365,16 +1390,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
13651390

13661391
#endif
13671392

1368-
/// Returns true if there is a vector loop region and \p VPV is defined in a
1369-
/// loop region.
1370-
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1371-
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1372-
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1373-
DefR->getParent()->getEnclosingLoopRegion());
1374-
}
1375-
13761393
bool VPValue::isDefinedOutsideLoopRegions() const {
1377-
return !isDefinedInsideLoopRegions(this);
1394+
auto *DefR = getDefiningRecipe();
1395+
if (!DefR)
1396+
return true;
1397+
1398+
const VPBasicBlock *DefVPBB = DefR->getParent();
1399+
auto *Plan = DefVPBB->getPlan();
1400+
if (Plan->getVectorLoopRegion())
1401+
return !DefR->getParent()->getEnclosingLoopRegion();
1402+
return DefVPBB == Plan->getEntry();
13781403
}
13791404
void VPValue::replaceAllUsesWith(VPValue *New) {
13801405
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });

llvm/lib/Transforms/Vectorize/VPlan.h

+7
Original file line numberDiff line numberDiff line change
@@ -3415,6 +3415,9 @@ class VPBasicBlock : public VPBlockBase {
34153415
/// second predecessor is the exiting block of the region.
34163416
const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
34173417

3418+
/// Returns true if the block is a loop header in a plain-CFG VPlan.
3419+
bool isHeader(const VPDominatorTree &VPDT) const;
3420+
34183421
protected:
34193422
/// Execute the recipes in the IR basic block \p BB.
34203423
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3566,6 +3569,10 @@ class VPRegionBlock : public VPBlockBase {
35663569
/// Clone all blocks in the single-entry single-exit region of the block and
35673570
/// their recipes without updating the operands of the cloned recipes.
35683571
VPRegionBlock *clone() override;
3572+
3573+
/// Remove the current region from its VPlan, connecting its predecessor to
3574+
/// its entry and exiting block to its successor.
3575+
void removeRegion();
35693576
};
35703577

35713578
/// VPlan models a candidate for vectorization, encoding various decisions take

0 commit comments

Comments
 (0)