Skip to content

Commit d31b3f6

Browse files
committed
[VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI).
1 parent 870f753 commit d31b3f6

File tree

6 files changed

+165
-109
lines changed

6 files changed

+165
-109
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+16-13
Original file line numberDiff line numberDiff line change
@@ -2433,12 +2433,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
24332433
// End if-block.
24342434
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
24352435
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
2436-
assert(
2437-
(Parent || !RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
2438-
all_of(RepRecipe->operands(),
2439-
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
2440-
"Expected a recipe is either within a region or all of its operands "
2441-
"are defined outside the vectorized region.");
24422436
if (IfPredicateInstr)
24432437
PredicatedInstructions.push_back(Cloned);
24442438
}
@@ -2947,14 +2941,18 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
29472941

29482942
// Don't apply optimizations below when no vector region remains, as they all
29492943
// require a vector loop at the moment.
2950-
if (!State.Plan->getVectorLoopRegion())
2944+
VPBasicBlock *HeaderVPBB = find_singleton<VPBasicBlock>(
2945+
vp_depth_first_shallow(State.Plan->getEntry()),
2946+
[&State](VPBlockBase *VPB, bool) {
2947+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
2948+
return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
2949+
});
2950+
if (!HeaderVPBB)
29512951
return;
29522952

29532953
for (Instruction *PI : PredicatedInstructions)
29542954
sinkScalarOperands(&*PI);
29552955

2956-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
2957-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
29582956
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
29592957

29602958
// Remove redundant induction instructions.
@@ -3055,7 +3053,7 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
30553053
}
30563054

30573055
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
3058-
auto Iter = vp_depth_first_deep(Plan.getEntry());
3056+
auto Iter = vp_depth_first_shallow(Plan.getEntry());
30593057
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
30603058
for (VPRecipeBase &P : VPBB->phis()) {
30613059
VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
@@ -7812,14 +7810,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78127810
// 2.6. Maintain Loop Hints
78137811
// Keep all loop hints from the original loop on the vector loop (we'll
78147812
// replace the vectorizer-specific hints below).
7815-
if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
7813+
auto BlockRange = vp_depth_first_shallow(BestVPlan.getEntry());
7814+
auto HeaderVPBBIter = find_if(BlockRange, [&State](VPBlockBase *VPB) {
7815+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
7816+
return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
7817+
});
7818+
if (HeaderVPBBIter != BlockRange.end()) {
7819+
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(*HeaderVPBBIter);
78167820
MDNode *OrigLoopID = OrigLoop->getLoopID();
78177821

78187822
std::optional<MDNode *> VectorizedLoopID =
78197823
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
78207824
LLVMLoopVectorizeFollowupVectorized});
78217825

7822-
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
78237826
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
78247827
if (VectorizedLoopID) {
78257828
L->setLoopID(*VectorizedLoopID);
@@ -7845,7 +7848,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78457848
ILV.printDebugTracesAtEnd();
78467849

78477850
// 4. Adjust branch weight of the branch in the middle block.
7848-
if (BestVPlan.getVectorLoopRegion()) {
7851+
if (HeaderVPBBIter != BlockRange.end()) {
78497852
auto *MiddleVPBB = BestVPlan.getMiddleBlock();
78507853
auto *MiddleTerm =
78517854
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());

llvm/lib/Transforms/Vectorize/VPlan.cpp

+109-85
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
208208
return Parent->getEnclosingBlockWithPredecessors();
209209
}
210210

211+
bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
212+
return getPredecessors().size() == 2 &&
213+
VPDT.dominates(this, getPredecessors()[1]);
214+
}
215+
211216
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
212217
iterator It = begin();
213218
while (It != end() && It->isPhi())
@@ -352,8 +357,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
352357
}
353358

354359
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
355-
VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
356-
return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
360+
return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
357361
}
358362

359363
void VPTransformState::addNewMetadata(Instruction *To,
@@ -436,14 +440,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
436440
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
437441
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
438442
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
439-
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
443+
BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB);
444+
if (!PredBB)
445+
continue;
440446

441447
assert(PredBB && "Predecessor basic-block not found building successor.");
442448
auto *PredBBTerminator = PredBB->getTerminator();
443449
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
444450

445451
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
446452
if (isa<UnreachableInst>(PredBBTerminator)) {
453+
if (PredVPSuccessors.size() == 2)
454+
continue;
447455
assert(PredVPSuccessors.size() == 1 &&
448456
"Predecessor ending w/o branch must have single successor.");
449457
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -499,11 +507,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
499507
bool Replica = bool(State->Lane);
500508
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
501509

510+
if (isHeader(State->VPDT)) {
511+
// Create and register the new vector loop.
512+
Loop *PrevParentLoop = State->CurrentParentLoop;
513+
State->CurrentParentLoop = State->LI->AllocateLoop();
514+
515+
// Insert the new loop into the loop nest and register the new basic blocks
516+
// before calling any utilities such as SCEV that require valid LoopInfo.
517+
if (PrevParentLoop)
518+
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
519+
else
520+
State->LI->addTopLevelLoop(State->CurrentParentLoop);
521+
}
522+
502523
auto IsReplicateRegion = [](VPBlockBase *BB) {
503524
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
504-
return R && R->isReplicator();
525+
assert((!R || R->isReplicator()) &&
526+
"only replicate region blocks should remain");
527+
return R;
505528
};
506-
507529
// 1. Create an IR basic block.
508530
if ((Replica && this == getParent()->getEntry()) ||
509531
IsReplicateRegion(getSingleHierarchicalPredecessor())) {
@@ -537,6 +559,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
537559

538560
// 2. Fill the IR basic block with IR instructions.
539561
executeRecipes(State, NewBB);
562+
563+
// If this block is a latch, update CurrentParentLoop.
564+
if (any_of(getSuccessors(), [State, this](VPBlockBase *Succ) {
565+
auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
566+
return VPBB && VPBB->isHeader(State->VPDT) &&
567+
State->VPDT.dominates(Succ, this);
568+
}))
569+
State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
540570
}
541571

542572
VPBasicBlock *VPBasicBlock::clone() {
@@ -747,35 +777,13 @@ VPRegionBlock *VPRegionBlock::clone() {
747777
}
748778

749779
void VPRegionBlock::execute(VPTransformState *State) {
750-
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
751-
RPOT(Entry);
752-
753-
if (!isReplicator()) {
754-
// Create and register the new vector loop.
755-
Loop *PrevParentLoop = State->CurrentParentLoop;
756-
State->CurrentParentLoop = State->LI->AllocateLoop();
757-
758-
// Insert the new loop into the loop nest and register the new basic blocks
759-
// before calling any utilities such as SCEV that require valid LoopInfo.
760-
if (PrevParentLoop)
761-
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
762-
else
763-
State->LI->addTopLevelLoop(State->CurrentParentLoop);
764-
765-
// Visit the VPBlocks connected to "this", starting from it.
766-
for (VPBlockBase *Block : RPOT) {
767-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
768-
Block->execute(State);
769-
}
770-
771-
State->CurrentParentLoop = PrevParentLoop;
772-
return;
773-
}
774-
780+
assert(isReplicator() &&
781+
"Loop regions should have been lowered to plain CFG");
775782
assert(!State->Lane && "Replicating a Region with non-null instance.");
776-
777-
// Enter replicating mode.
778783
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
784+
785+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
786+
Entry);
779787
State->Lane = VPLane(0);
780788
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
781789
++Lane) {
@@ -850,6 +858,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
850858
}
851859
#endif
852860

861+
void VPRegionBlock::removeRegion() {
862+
auto *Header = cast<VPBasicBlock>(getEntry());
863+
VPBlockBase *Preheader = getSinglePredecessor();
864+
auto *Exiting = cast<VPBasicBlock>(getExiting());
865+
866+
VPBlockBase *Middle = getSingleSuccessor();
867+
VPBlockUtils::disconnectBlocks(Preheader, this);
868+
VPBlockUtils::disconnectBlocks(this, Middle);
869+
870+
for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
871+
VPB->setParent(nullptr);
872+
873+
VPBlockUtils::connectBlocks(Preheader, Header);
874+
VPBlockUtils::connectBlocks(Exiting, Middle);
875+
}
876+
853877
VPlan::VPlan(Loop *L) {
854878
setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
855879
ScalarHeader = createVPIRBasicBlock(L->getHeader());
@@ -961,57 +985,57 @@ void VPlan::execute(VPTransformState *State) {
961985
for (VPBlockBase *Block : RPOT)
962986
Block->execute(State);
963987

964-
State->CFG.DTU.flush();
965-
966-
auto *LoopRegion = getVectorLoopRegion();
967-
if (!LoopRegion)
968-
return;
969-
970-
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
971-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
972-
973988
// Fix the latch value of canonical, reduction and first-order recurrences
974989
// phis in the vector loop.
975-
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
976-
for (VPRecipeBase &R : Header->phis()) {
977-
// Skip phi-like recipes that generate their backedege values themselves.
978-
if (isa<VPWidenPHIRecipe>(&R))
990+
for (VPBasicBlock *Header :
991+
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
992+
if (!Header->isHeader(State->VPDT))
979993
continue;
994+
for (VPRecipeBase &R : Header->phis()) {
995+
if (isa<VPWidenPHIRecipe>(&R))
996+
continue;
980997

981-
if (isa<VPWidenInductionRecipe>(&R)) {
982-
PHINode *Phi = nullptr;
983-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
984-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
985-
} else {
986-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
987-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
988-
"recipe generating only scalars should have been replaced");
989-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
990-
Phi = cast<PHINode>(GEP->getPointerOperand());
998+
auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
999+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
1000+
1001+
if (isa<VPWidenInductionRecipe>(&R)) {
1002+
PHINode *Phi = nullptr;
1003+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1004+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
1005+
} else {
1006+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1007+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
1008+
"recipe generating only scalars should have been replaced");
1009+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1010+
Phi = cast<PHINode>(GEP->getPointerOperand());
1011+
}
1012+
1013+
Phi->setIncomingBlock(1, VectorLatchBB);
1014+
1015+
// Move the last step to the end of the latch block. This ensures
1016+
// consistent placement of all induction updates.
1017+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1018+
Inc->moveBefore(
1019+
std::prev(VectorLatchBB->getTerminator()->getIterator()));
1020+
1021+
// Use the steps for the last part as backedge value for the induction.
1022+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1023+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1024+
continue;
9911025
}
9921026

993-
Phi->setIncomingBlock(1, VectorLatchBB);
994-
995-
// Move the last step to the end of the latch block. This ensures
996-
// consistent placement of all induction updates.
997-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
998-
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
999-
1000-
// Use the steps for the last part as backedge value for the induction.
1001-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1002-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1003-
continue;
1027+
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1028+
// VPInstructions currently model scalar Phis only.
1029+
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1030+
(isa<VPReductionPHIRecipe>(PhiR) &&
1031+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1032+
1033+
Value *Phi = State->get(PhiR, NeedsScalar);
1034+
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1035+
// not.
1036+
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1037+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10041038
}
1005-
1006-
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1007-
// VPInstructions currently model scalar Phis only.
1008-
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1009-
(isa<VPReductionPHIRecipe>(PhiR) &&
1010-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1011-
Value *Phi = State->get(PhiR, NeedsScalar);
1012-
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1013-
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1014-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10151039
}
10161040
}
10171041

@@ -1363,16 +1387,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
13631387

13641388
#endif
13651389

1366-
/// Returns true if there is a vector loop region and \p VPV is defined in a
1367-
/// loop region.
1368-
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1369-
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1370-
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1371-
DefR->getParent()->getEnclosingLoopRegion());
1372-
}
1373-
13741390
bool VPValue::isDefinedOutsideLoopRegions() const {
1375-
return !isDefinedInsideLoopRegions(this);
1391+
auto *DefR = getDefiningRecipe();
1392+
if (!DefR)
1393+
return true;
1394+
1395+
const VPBasicBlock *DefVPBB = DefR->getParent();
1396+
auto *Plan = DefVPBB->getPlan();
1397+
if (Plan->getVectorLoopRegion())
1398+
return !DefR->getParent()->getEnclosingLoopRegion();
1399+
return DefVPBB == Plan->getEntry();
13761400
}
13771401
void VPValue::replaceAllUsesWith(VPValue *New) {
13781402
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });

llvm/lib/Transforms/Vectorize/VPlan.h

+7
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class SCEV;
5757
class Type;
5858
class VPBasicBlock;
5959
class VPBuilder;
60+
class VPDominatorTree;
6061
class VPRegionBlock;
6162
class VPlan;
6263
class VPLane;
@@ -3221,6 +3222,8 @@ class VPBasicBlock : public VPBlockBase {
32213222
/// the cloned recipes.
32223223
VPBasicBlock *clone() override;
32233224

3225+
bool isHeader(const VPDominatorTree &VPDT) const;
3226+
32243227
protected:
32253228
/// Execute the recipes in the IR basic block \p BB.
32263229
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3367,6 +3370,10 @@ class VPRegionBlock : public VPBlockBase {
33673370
/// Clone all blocks in the single-entry single-exit region of the block and
33683371
/// their recipes without updating the operands of the cloned recipes.
33693372
VPRegionBlock *clone() override;
3373+
3374+
/// Remove the current region from its VPlan, connecting its predecessor to
3375+
/// its entry and exiting block to its successor.
3376+
void removeRegion();
33703377
};
33713378

33723379
/// VPlan models a candidate for vectorization, encoding various decisions take

0 commit comments

Comments
 (0)