Skip to content

Commit 6894aa5

Browse files
committed
[VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI).
!fixup update more tests.
1 parent 52d2b58 commit 6894aa5

13 files changed

+298
-277
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+19-16
Original file line numberDiff line numberDiff line change
@@ -2367,14 +2367,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
23672367
// If we just cloned a new assumption, add it the assumption cache.
23682368
if (auto *II = dyn_cast<AssumeInst>(Cloned))
23692369
AC->registerAssumption(II);
2370-
2371-
assert(
2372-
(RepRecipe->getParent()->getParent() ||
2373-
!RepRecipe->getParent()->getPlan()->getVectorLoopRegion() ||
2374-
all_of(RepRecipe->operands(),
2375-
[](VPValue *Op) { return Op->isDefinedOutsideLoopRegions(); })) &&
2376-
"Expected a recipe is either within a region or all of its operands "
2377-
"are defined outside the vectorized region.");
23782370
}
23792371

23802372
Value *
@@ -2845,11 +2837,15 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28452837

28462838
// Don't apply optimizations below when no vector region remains, as they all
28472839
// require a vector loop at the moment.
2848-
if (!State.Plan->getVectorLoopRegion())
2840+
VPBasicBlock *HeaderVPBB = find_singleton<VPBasicBlock>(
2841+
vp_depth_first_shallow(State.Plan->getEntry()),
2842+
[&State](VPBlockBase *VPB, bool) {
2843+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
2844+
return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
2845+
});
2846+
if (!HeaderVPBB)
28492847
return;
28502848

2851-
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
2852-
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
28532849
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
28542850

28552851
// Remove redundant induction instructions.
@@ -2874,7 +2870,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
28742870
}
28752871

28762872
void InnerLoopVectorizer::fixNonInductionPHIs(VPTransformState &State) {
2877-
auto Iter = vp_depth_first_deep(Plan.getEntry());
2873+
auto Iter = vp_depth_first_shallow(Plan.getEntry());
28782874
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
28792875
for (VPRecipeBase &P : VPBB->phis()) {
28802876
VPWidenPHIRecipe *VPPhi = dyn_cast<VPWidenPHIRecipe>(&P);
@@ -7859,6 +7855,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
78597855
BestVPlan, BestVF,
78607856
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
78617857
VPlanTransforms::removeDeadRecipes(BestVPlan);
7858+
7859+
VPBasicBlock *MiddleVPBB =
7860+
BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr;
78627861
VPlanTransforms::convertToConcreteRecipes(BestVPlan,
78637862
*Legal->getWidestInductionType());
78647863

@@ -7972,14 +7971,19 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
79727971
// 2.6. Maintain Loop Hints
79737972
// Keep all loop hints from the original loop on the vector loop (we'll
79747973
// replace the vectorizer-specific hints below).
7975-
if (auto *LoopRegion = BestVPlan.getVectorLoopRegion()) {
7974+
auto BlockRange = vp_depth_first_shallow(BestVPlan.getEntry());
7975+
auto HeaderVPBBIter = find_if(BlockRange, [&State](VPBlockBase *VPB) {
7976+
auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
7977+
return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr;
7978+
});
7979+
if (HeaderVPBBIter != BlockRange.end()) {
7980+
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(*HeaderVPBBIter);
79767981
MDNode *OrigLoopID = OrigLoop->getLoopID();
79777982

79787983
std::optional<MDNode *> VectorizedLoopID =
79797984
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
79807985
LLVMLoopVectorizeFollowupVectorized});
79817986

7982-
VPBasicBlock *HeaderVPBB = LoopRegion->getEntryBasicBlock();
79837987
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
79847988
if (VectorizedLoopID) {
79857989
L->setLoopID(*VectorizedLoopID);
@@ -8005,8 +8009,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
80058009
ILV.printDebugTracesAtEnd();
80068010

80078011
// 4. Adjust branch weight of the branch in the middle block.
8008-
if (BestVPlan.getVectorLoopRegion()) {
8009-
auto *MiddleVPBB = BestVPlan.getMiddleBlock();
8012+
if (HeaderVPBBIter != BlockRange.end()) {
80108013
auto *MiddleTerm =
80118014
cast<BranchInst>(State.CFG.VPBB2IRBB[MiddleVPBB]->getTerminator());
80128015
if (MiddleTerm->isConditional() &&

llvm/lib/Transforms/Vectorize/VPlan.cpp

+109-85
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207207
return Parent->getEnclosingBlockWithPredecessors();
208208
}
209209

210+
bool VPBasicBlock::isHeader(const VPDominatorTree &VPDT) const {
211+
return getPredecessors().size() == 2 &&
212+
VPDT.dominates(this, getPredecessors()[1]);
213+
}
214+
210215
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
211216
iterator It = begin();
212217
while (It != end() && It->isPhi())
@@ -351,8 +356,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
351356
}
352357

353358
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
354-
VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
355-
return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
359+
return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
356360
}
357361

358362
void VPTransformState::addNewMetadata(Instruction *To,
@@ -437,14 +441,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
437441
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
438442
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
439443
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
440-
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
444+
BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB);
445+
if (!PredBB)
446+
continue;
441447

442448
assert(PredBB && "Predecessor basic-block not found building successor.");
443449
auto *PredBBTerminator = PredBB->getTerminator();
444450
LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
445451

446452
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
447453
if (isa<UnreachableInst>(PredBBTerminator)) {
454+
if (PredVPSuccessors.size() == 2)
455+
continue;
448456
assert(PredVPSuccessors.size() == 1 &&
449457
"Predecessor ending w/o branch must have single successor.");
450458
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -500,11 +508,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
500508
bool Replica = bool(State->Lane);
501509
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
502510

511+
if (isHeader(State->VPDT)) {
512+
// Create and register the new vector loop.
513+
Loop *PrevParentLoop = State->CurrentParentLoop;
514+
State->CurrentParentLoop = State->LI->AllocateLoop();
515+
516+
// Insert the new loop into the loop nest and register the new basic blocks
517+
// before calling any utilities such as SCEV that require valid LoopInfo.
518+
if (PrevParentLoop)
519+
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
520+
else
521+
State->LI->addTopLevelLoop(State->CurrentParentLoop);
522+
}
523+
503524
auto IsReplicateRegion = [](VPBlockBase *BB) {
504525
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
505-
return R && R->isReplicator();
526+
assert((!R || R->isReplicator()) &&
527+
"only replicate region blocks should remain");
528+
return R;
506529
};
507-
508530
// 1. Create an IR basic block.
509531
if ((Replica && this == getParent()->getEntry()) ||
510532
IsReplicateRegion(getSingleHierarchicalPredecessor())) {
@@ -527,6 +549,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
527549

528550
// 2. Fill the IR basic block with IR instructions.
529551
executeRecipes(State, NewBB);
552+
553+
// If this block is a latch, update CurrentParentLoop.
554+
if (any_of(getSuccessors(), [State, this](VPBlockBase *Succ) {
555+
auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
556+
return VPBB && VPBB->isHeader(State->VPDT) &&
557+
State->VPDT.dominates(Succ, this);
558+
}))
559+
State->CurrentParentLoop = State->CurrentParentLoop->getParentLoop();
530560
}
531561

532562
VPBasicBlock *VPBasicBlock::clone() {
@@ -739,35 +769,13 @@ VPRegionBlock *VPRegionBlock::clone() {
739769
}
740770

741771
void VPRegionBlock::execute(VPTransformState *State) {
742-
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
743-
RPOT(Entry);
744-
745-
if (!isReplicator()) {
746-
// Create and register the new vector loop.
747-
Loop *PrevParentLoop = State->CurrentParentLoop;
748-
State->CurrentParentLoop = State->LI->AllocateLoop();
749-
750-
// Insert the new loop into the loop nest and register the new basic blocks
751-
// before calling any utilities such as SCEV that require valid LoopInfo.
752-
if (PrevParentLoop)
753-
PrevParentLoop->addChildLoop(State->CurrentParentLoop);
754-
else
755-
State->LI->addTopLevelLoop(State->CurrentParentLoop);
756-
757-
// Visit the VPBlocks connected to "this", starting from it.
758-
for (VPBlockBase *Block : RPOT) {
759-
LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
760-
Block->execute(State);
761-
}
762-
763-
State->CurrentParentLoop = PrevParentLoop;
764-
return;
765-
}
766-
772+
assert(isReplicator() &&
773+
"Loop regions should have been lowered to plain CFG");
767774
assert(!State->Lane && "Replicating a Region with non-null instance.");
768-
769-
// Enter replicating mode.
770775
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
776+
777+
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
778+
Entry);
771779
State->Lane = VPLane(0);
772780
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
773781
++Lane) {
@@ -842,6 +850,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
842850
}
843851
#endif
844852

853+
void VPRegionBlock::removeRegion() {
854+
auto *Header = cast<VPBasicBlock>(getEntry());
855+
VPBlockBase *Preheader = getSinglePredecessor();
856+
auto *Exiting = cast<VPBasicBlock>(getExiting());
857+
858+
VPBlockBase *Middle = getSingleSuccessor();
859+
VPBlockUtils::disconnectBlocks(Preheader, this);
860+
VPBlockUtils::disconnectBlocks(this, Middle);
861+
862+
for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
863+
VPB->setParent(nullptr);
864+
865+
VPBlockUtils::connectBlocks(Preheader, Header);
866+
VPBlockUtils::connectBlocks(Exiting, Middle);
867+
}
868+
845869
VPlan::VPlan(Loop *L) {
846870
setEntry(createVPIRBasicBlock(L->getLoopPreheader()));
847871
ScalarHeader = createVPIRBasicBlock(L->getHeader());
@@ -951,57 +975,57 @@ void VPlan::execute(VPTransformState *State) {
951975
for (VPBlockBase *Block : RPOT)
952976
Block->execute(State);
953977

954-
State->CFG.DTU.flush();
955-
956-
auto *LoopRegion = getVectorLoopRegion();
957-
if (!LoopRegion)
958-
return;
959-
960-
VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
961-
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
962-
963978
// Fix the latch value of canonical, reduction and first-order recurrences
964979
// phis in the vector loop.
965-
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
966-
for (VPRecipeBase &R : Header->phis()) {
967-
// Skip phi-like recipes that generate their backedege values themselves.
968-
if (isa<VPWidenPHIRecipe>(&R))
980+
for (VPBasicBlock *Header :
981+
VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
982+
if (!Header->isHeader(State->VPDT))
969983
continue;
984+
for (VPRecipeBase &R : Header->phis()) {
985+
if (isa<VPWidenPHIRecipe>(&R))
986+
continue;
970987

971-
if (isa<VPWidenInductionRecipe>(&R)) {
972-
PHINode *Phi = nullptr;
973-
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
974-
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
975-
} else {
976-
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
977-
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
978-
"recipe generating only scalars should have been replaced");
979-
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
980-
Phi = cast<PHINode>(GEP->getPointerOperand());
988+
auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors()[1]);
989+
BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
990+
991+
if (isa<VPWidenInductionRecipe>(&R)) {
992+
PHINode *Phi = nullptr;
993+
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
994+
Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
995+
} else {
996+
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
997+
assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
998+
"recipe generating only scalars should have been replaced");
999+
auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
1000+
Phi = cast<PHINode>(GEP->getPointerOperand());
1001+
}
1002+
1003+
Phi->setIncomingBlock(1, VectorLatchBB);
1004+
1005+
// Move the last step to the end of the latch block. This ensures
1006+
// consistent placement of all induction updates.
1007+
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
1008+
Inc->moveBefore(
1009+
std::prev(VectorLatchBB->getTerminator()->getIterator()));
1010+
1011+
// Use the steps for the last part as backedge value for the induction.
1012+
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1013+
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
1014+
continue;
9811015
}
9821016

983-
Phi->setIncomingBlock(1, VectorLatchBB);
984-
985-
// Move the last step to the end of the latch block. This ensures
986-
// consistent placement of all induction updates.
987-
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
988-
Inc->moveBefore(std::prev(VectorLatchBB->getTerminator()->getIterator()));
989-
990-
// Use the steps for the last part as backedge value for the induction.
991-
if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
992-
Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
993-
continue;
1017+
auto *PhiR = cast<VPSingleDefRecipe>(&R);
1018+
// VPInstructions currently model scalar Phis only.
1019+
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1020+
(isa<VPReductionPHIRecipe>(PhiR) &&
1021+
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1022+
1023+
Value *Phi = State->get(PhiR, NeedsScalar);
1024+
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1025+
// not.
1026+
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1027+
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
9941028
}
995-
996-
auto *PhiR = cast<VPSingleDefRecipe>(&R);
997-
// VPInstructions currently model scalar Phis only.
998-
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
999-
(isa<VPReductionPHIRecipe>(PhiR) &&
1000-
cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
1001-
Value *Phi = State->get(PhiR, NeedsScalar);
1002-
// VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1003-
Value *Val = State->get(PhiR->getOperand(1), NeedsScalar);
1004-
cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
10051029
}
10061030
}
10071031

@@ -1360,16 +1384,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
13601384

13611385
#endif
13621386

1363-
/// Returns true if there is a vector loop region and \p VPV is defined in a
1364-
/// loop region.
1365-
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
1366-
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
1367-
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
1368-
DefR->getParent()->getEnclosingLoopRegion());
1369-
}
1370-
13711387
bool VPValue::isDefinedOutsideLoopRegions() const {
1372-
return !isDefinedInsideLoopRegions(this);
1388+
auto *DefR = getDefiningRecipe();
1389+
if (!DefR)
1390+
return true;
1391+
1392+
const VPBasicBlock *DefVPBB = DefR->getParent();
1393+
auto *Plan = DefVPBB->getPlan();
1394+
if (Plan->getVectorLoopRegion())
1395+
return !DefR->getParent()->getEnclosingLoopRegion();
1396+
return DefVPBB == Plan->getEntry();
13731397
}
13741398
void VPValue::replaceAllUsesWith(VPValue *New) {
13751399
replaceUsesWithIf(New, [](VPUser &, unsigned) { return true; });

llvm/lib/Transforms/Vectorize/VPlan.h

+6
Original file line numberDiff line numberDiff line change
@@ -3324,6 +3324,8 @@ class VPBasicBlock : public VPBlockBase {
33243324
/// the cloned recipes.
33253325
VPBasicBlock *clone() override;
33263326

3327+
bool isHeader(const VPDominatorTree &VPDT) const;
3328+
33273329
protected:
33283330
/// Execute the recipes in the IR basic block \p BB.
33293331
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3470,6 +3472,10 @@ class VPRegionBlock : public VPBlockBase {
34703472
/// Clone all blocks in the single-entry single-exit region of the block and
34713473
/// their recipes without updating the operands of the cloned recipes.
34723474
VPRegionBlock *clone() override;
3475+
3476+
/// Remove the current region from its VPlan, connecting its predecessor to
3477+
/// its entry and exiting block to its successor.
3478+
void removeRegion();
34733479
};
34743480

34753481
/// VPlan models a candidate for vectorization, encoding various decisions take

0 commit comments

Comments
 (0)