-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[VPlan] Replace VPRegionBlock with explicit CFG before execute (NFCI). #117506
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesBuilding on top of #114305, Patch is 25.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117506.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d13770a35c108f..ec48f90ba4fcca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2368,12 +2368,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// End if-block.
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
- assert((Parent || all_of(RepRecipe->operands(),
- [](VPValue *Op) {
- return Op->isDefinedOutsideLoopRegions();
- })) &&
- "Expected a recipe is either within a region or all of its operands "
- "are defined outside the vectorized region.");
if (IfPredicateInstr)
PredicatedInstructions.push_back(Cloned);
}
@@ -2969,8 +2963,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
- VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
- VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
+ State.Plan->getVectorPreheader()->getSingleSuccessor());
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
// Remove redundant induction instructions.
@@ -7742,6 +7736,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan.prepareToExecute(ILV.getTripCount(),
ILV.getOrCreateVectorTripCount(nullptr),
CanonicalIVStartValue, State);
+ VPlanTransforms::prepareToExecute(BestVPlan);
BestVPlan.execute(&State);
@@ -7763,7 +7758,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
LLVMLoopVectorizeFollowupVectorized});
VPBasicBlock *HeaderVPBB =
- BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
+ cast<VPBasicBlock>(BestVPlan.getVectorPreheader()->getSingleSuccessor());
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
if (VectorizedLoopID)
L->setLoopID(*VectorizedLoopID);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 529108a5aaa97f..a3e11c7f2f5f8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -352,8 +352,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
}
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
- VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
- return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+
+ return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
}
void VPTransformState::addNewMetadata(Instruction *To,
@@ -425,6 +425,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+ if (!PredBB)
+ continue;
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
@@ -432,6 +434,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
if (isa<UnreachableInst>(PredBBTerminator)) {
+ if (PredVPSuccessors.size() == 2)
+ continue;
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -480,6 +484,21 @@ void VPBasicBlock::execute(VPTransformState *State) {
bool Replica = bool(State->Lane);
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
+ if (isHeader()) {
+ // Create and register the new vector loop.
+ State->CurrentVectorLoop = State->LI->AllocateLoop();
+ BasicBlock *VectorPH =
+ State->CFG.VPBB2IRBB[cast<VPBasicBlock>(getPredecessors()[0])];
+ Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addChildLoop(State->CurrentVectorLoop);
+ else
+ State->LI->addTopLevelLoop(State->CurrentVectorLoop);
+ }
+
auto IsReplicateRegion = [](VPBlockBase *BB) {
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
return R && R->isReplicator();
@@ -718,37 +737,13 @@ void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
}
void VPRegionBlock::execute(VPTransformState *State) {
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
- RPOT(Entry);
-
- if (!isReplicator()) {
- // Create and register the new vector loop.
- Loop *PrevLoop = State->CurrentVectorLoop;
- State->CurrentVectorLoop = State->LI->AllocateLoop();
- BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
- Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
-
- // Insert the new loop into the loop nest and register the new basic blocks
- // before calling any utilities such as SCEV that require valid LoopInfo.
- if (ParentLoop)
- ParentLoop->addChildLoop(State->CurrentVectorLoop);
- else
- State->LI->addTopLevelLoop(State->CurrentVectorLoop);
-
- // Visit the VPBlocks connected to "this", starting from it.
- for (VPBlockBase *Block : RPOT) {
- LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
- Block->execute(State);
- }
-
- State->CurrentVectorLoop = PrevLoop;
- return;
- }
-
+ assert(isReplicator() &&
+ "Loop regions should have been lowered to plain CFG");
assert(!State->Lane && "Replicating a Region with non-null instance.");
-
- // Enter replicating mode.
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
+
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
State->Lane = VPLane(0);
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
@@ -823,6 +818,26 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPRegionBlock::removeRegion() {
+ auto *Header = cast<VPBasicBlock>(getEntry());
+ VPBlockBase *Preheader = getSinglePredecessor();
+ auto *Exiting = cast<VPBasicBlock>(getExiting());
+
+ VPBlockBase *Middle = getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(Preheader, this);
+ VPBlockUtils::disconnectBlocks(this, Middle);
+
+ for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
+ VPB->setParent(nullptr);
+
+ VPBlockUtils::connectBlocks(Preheader, Header);
+ VPBlockUtils::connectBlocks(Exiting, Middle);
+
+ // Set LoopRegion's Entry to nullptr, as the CFG from LoopRegion shouldn't
+ // be deleted when the region is deleted.
+ Entry = nullptr;
+}
+
VPlan::~VPlan() {
if (Entry) {
VPValue DummyValue;
@@ -1032,51 +1047,55 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
Block->execute(State);
- VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
- BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
-
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
- VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
- for (VPRecipeBase &R : Header->phis()) {
- // Skip phi-like recipes that generate their backedege values themselves.
- if (isa<VPWidenPHIRecipe>(&R))
+ for (VPBasicBlock *Header :
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
+ if (!Header->isHeader())
continue;
+ for (VPRecipeBase &R : Header->phis()) {
+ VPBasicBlock *LatchVPBB =
+ cast<VPBasicBlock>(Header->getPredecessors()[1]);
+ BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- PHINode *Phi = nullptr;
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
- } else {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
- Phi = cast<PHINode>(GEP->getPointerOperand());
- }
-
- Phi->setIncomingBlock(1, VectorLatchBB);
+ // Skip phi-like recipes that generate their backedege values themselves.
+ if (isa<VPWidenPHIRecipe>(&R))
+ continue;
- // Move the last step to the end of the latch block. This ensures
- // consistent placement of all induction updates.
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ PHINode *Phi = nullptr;
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
+ } else {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
+ "recipe generating only scalars should have been replaced");
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
+ }
+
+ Phi->setIncomingBlock(1, VectorLatchBB);
+
+ // Move the last step to the end of the latch block. This ensures
+ // consistent placement of all induction updates.
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+ Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+
+ // Use the steps for the last part as backedge value for the induction.
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
+ Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
+ continue;
+ }
- // Use the steps for the last part as backedge value for the induction.
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
- Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
- continue;
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
+ bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
+ Value *Phi = State->get(PhiR, NeedsScalar);
+ Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
+ cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
-
- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
- bool NeedsScalar =
- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
- Value *Phi = State->get(PhiR, NeedsScalar);
- Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
- cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
State->CFG.DTU.flush();
@@ -1418,8 +1437,13 @@ void VPlanIngredient::print(raw_ostream &O) const {
#endif
bool VPValue::isDefinedOutsideLoopRegions() const {
- return !hasDefiningRecipe() ||
- !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
+ auto *DefR = getDefiningRecipe();
+ if (!DefR)
+ return true;
+
+ const VPBasicBlock *DefVPBB = DefR->getParent();
+ auto *Plan = DefVPBB->getPlan();
+ return DefVPBB == Plan->getPreheader() || DefVPBB == Plan->getEntry();
}
void VPValue::replaceAllUsesWith(VPValue *New) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6f7c5c3580114d..58e57db618ddd8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2239,6 +2239,45 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
#endif
};
+/// Recipe to generate a scalar PHI. Used to generate code for recipes that
+/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
+/// VPEVLBasedIVPHIRecipe.
+class VPScalarPHIRecipe : public VPHeaderPHIRecipe {
+ std::string Name;
+
+public:
+ VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
+ StringRef Name)
+ : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
+ Name(Name.str()) {
+ addOperand(BackedgeValue);
+ }
+
+ ~VPScalarPHIRecipe() override = default;
+
+ VPScalarPHIRecipe *clone() override {
+ llvm_unreachable("cloning not implemented yet");
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
+
+ /// Generate the phi/select nodes.
+ void execute(VPTransformState &State) override;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for handling phis that are widened in the vector loop.
/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
/// managed in the recipe directly.
@@ -3134,8 +3173,10 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
}
- /// Generate the canonical scalar induction phi of the vector loop.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
@@ -3231,9 +3272,10 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
}
- /// Generate phi for handling IV based on EVL over iterations correctly.
- /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
/// Return the cost of this VPEVLBasedIVPHIRecipe.
InstructionCost computeCost(ElementCount VF,
@@ -3543,6 +3585,8 @@ class VPBasicBlock : public VPBlockBase {
return NewBlock;
}
+ bool isHeader() { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); }
+
protected:
/// Execute the recipes in the IR basic block \p BB.
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3700,6 +3744,10 @@ class VPRegionBlock : public VPBlockBase {
/// Clone all blocks in the single-entry single-exit region of the block and
/// their recipes without updating the operands of the cloned recipes.
VPRegionBlock *clone() override;
+
+ /// Remove the current region from its VPlan, connecting its predecessor to
+ /// its entry and exiting block to its successor.
+ void removeRegion();
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -3833,10 +3881,10 @@ class VPlan {
/// whether to execute the scalar tail loop or the exit block from the loop
/// latch.
const VPBasicBlock *getMiddleBlock() const {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
}
VPBasicBlock *getMiddleBlock() {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
}
/// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
@@ -3958,9 +4006,7 @@ class VPlan {
}
/// Returns the preheader of the vector loop region.
- VPBasicBlock *getVectorPreheader() {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor());
- }
+ VPBasicBlock *getVectorPreheader() { return cast<VPBasicBlock>(getEntry()); }
/// Returns the canonical induction recipe of the vector loop.
VPCanonicalIVPHIRecipe *getCanonicalIV() {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index cb42cfe8159b04..969d07b229e469 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -213,14 +213,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
- VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
- [this](const auto *R) {
- // Handle header phi recipes, except VPWidenIntOrFpInduction
- // which needs special handling due it being possibly truncated.
- // TODO: consider inferring/caching type of siblings, e.g.,
- // backedge value, here and in cases below.
- return inferScalarType(R->getStartValue());
- })
+ VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
+ VPScalarPHIRecipe>([this](const auto *R) {
+ // Handle header phi recipes, except VPWidenIntOrFpInduction
+ // which needs special handling due it being possibly truncated.
+ // TODO: consider inferring/caching type of siblings, e.g.,
+ // backedge value, here and in cases below.
+ return inferScalarType(R->getStartValue());
+ })
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b2ee31c3e240a1..dc8a4435818475 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -491,11 +491,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- if (!getParent()->isExiting())
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
+ if (!State.CFG.VPBB2IRBB.contains(Header))
return CondBr;
- VPRegionBlock *ParentRegion = getParent()->getParent();
- VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
return CondBr;
}
@@ -506,9 +505,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Cond = Builder.CreateICmpEQ(IV, TC);
// Now create the branch.
- auto *Plan = getParent()->getPlan();
- VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
- VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination (the header) now and to the
@@ -3099,17 +3096,6 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
VectorTy, std::nullopt, CostKind, 0);
}
-void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
- Value *Start = getStartValue()->getLiveInIRValue();
- PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");...
[truncated]
|
@llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesBuilding on top of #114305, Patch is 25.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117506.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d13770a35c108f..ec48f90ba4fcca 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2368,12 +2368,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
// End if-block.
VPRegionBlock *Parent = RepRecipe->getParent()->getParent();
bool IfPredicateInstr = Parent ? Parent->isReplicator() : false;
- assert((Parent || all_of(RepRecipe->operands(),
- [](VPValue *Op) {
- return Op->isDefinedOutsideLoopRegions();
- })) &&
- "Expected a recipe is either within a region or all of its operands "
- "are defined outside the vectorized region.");
if (IfPredicateInstr)
PredicatedInstructions.push_back(Cloned);
}
@@ -2969,8 +2963,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
for (Instruction *PI : PredicatedInstructions)
sinkScalarOperands(&*PI);
- VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
- VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
+ State.Plan->getVectorPreheader()->getSingleSuccessor());
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB];
// Remove redundant induction instructions.
@@ -7742,6 +7736,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
BestVPlan.prepareToExecute(ILV.getTripCount(),
ILV.getOrCreateVectorTripCount(nullptr),
CanonicalIVStartValue, State);
+ VPlanTransforms::prepareToExecute(BestVPlan);
BestVPlan.execute(&State);
@@ -7763,7 +7758,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
LLVMLoopVectorizeFollowupVectorized});
VPBasicBlock *HeaderVPBB =
- BestVPlan.getVectorLoopRegion()->getEntryBasicBlock();
+ cast<VPBasicBlock>(BestVPlan.getVectorPreheader()->getSingleSuccessor());
Loop *L = LI->getLoopFor(State.CFG.VPBB2IRBB[HeaderVPBB]);
if (VectorizedLoopID)
L->setLoopID(*VectorizedLoopID);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 529108a5aaa97f..a3e11c7f2f5f8d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -352,8 +352,8 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
}
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor(VPRecipeBase *R) {
- VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion();
- return VPBB2IRBB[LoopRegion->getPreheaderVPBB()];
+
+ return VPBB2IRBB[cast<VPBasicBlock>(R->getParent()->getPredecessors()[0])];
}
void VPTransformState::addNewMetadata(Instruction *To,
@@ -425,6 +425,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors();
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
+ if (!PredBB)
+ continue;
assert(PredBB && "Predecessor basic-block not found building successor.");
auto *PredBBTerminator = PredBB->getTerminator();
@@ -432,6 +434,8 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) {
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
if (isa<UnreachableInst>(PredBBTerminator)) {
+ if (PredVPSuccessors.size() == 2)
+ continue;
assert(PredVPSuccessors.size() == 1 &&
"Predecessor ending w/o branch must have single successor.");
DebugLoc DL = PredBBTerminator->getDebugLoc();
@@ -480,6 +484,21 @@ void VPBasicBlock::execute(VPTransformState *State) {
bool Replica = bool(State->Lane);
BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
+ if (isHeader()) {
+ // Create and register the new vector loop.
+ State->CurrentVectorLoop = State->LI->AllocateLoop();
+ BasicBlock *VectorPH =
+ State->CFG.VPBB2IRBB[cast<VPBasicBlock>(getPredecessors()[0])];
+ Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
+
+ // Insert the new loop into the loop nest and register the new basic blocks
+ // before calling any utilities such as SCEV that require valid LoopInfo.
+ if (ParentLoop)
+ ParentLoop->addChildLoop(State->CurrentVectorLoop);
+ else
+ State->LI->addTopLevelLoop(State->CurrentVectorLoop);
+ }
+
auto IsReplicateRegion = [](VPBlockBase *BB) {
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
return R && R->isReplicator();
@@ -718,37 +737,13 @@ void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
}
void VPRegionBlock::execute(VPTransformState *State) {
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
- RPOT(Entry);
-
- if (!isReplicator()) {
- // Create and register the new vector loop.
- Loop *PrevLoop = State->CurrentVectorLoop;
- State->CurrentVectorLoop = State->LI->AllocateLoop();
- BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()];
- Loop *ParentLoop = State->LI->getLoopFor(VectorPH);
-
- // Insert the new loop into the loop nest and register the new basic blocks
- // before calling any utilities such as SCEV that require valid LoopInfo.
- if (ParentLoop)
- ParentLoop->addChildLoop(State->CurrentVectorLoop);
- else
- State->LI->addTopLevelLoop(State->CurrentVectorLoop);
-
- // Visit the VPBlocks connected to "this", starting from it.
- for (VPBlockBase *Block : RPOT) {
- LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
- Block->execute(State);
- }
-
- State->CurrentVectorLoop = PrevLoop;
- return;
- }
-
+ assert(isReplicator() &&
+ "Loop regions should have been lowered to plain CFG");
assert(!State->Lane && "Replicating a Region with non-null instance.");
-
- // Enter replicating mode.
assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
+
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
+ Entry);
State->Lane = VPLane(0);
for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
++Lane) {
@@ -823,6 +818,26 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
}
#endif
+void VPRegionBlock::removeRegion() {
+ auto *Header = cast<VPBasicBlock>(getEntry());
+ VPBlockBase *Preheader = getSinglePredecessor();
+ auto *Exiting = cast<VPBasicBlock>(getExiting());
+
+ VPBlockBase *Middle = getSingleSuccessor();
+ VPBlockUtils::disconnectBlocks(Preheader, this);
+ VPBlockUtils::disconnectBlocks(this, Middle);
+
+ for (VPBlockBase *VPB : vp_depth_first_shallow(Entry))
+ VPB->setParent(nullptr);
+
+ VPBlockUtils::connectBlocks(Preheader, Header);
+ VPBlockUtils::connectBlocks(Exiting, Middle);
+
+ // Set LoopRegion's Entry to nullptr, as the CFG from LoopRegion shouldn't
+ // be deleted when the region is deleted.
+ Entry = nullptr;
+}
+
VPlan::~VPlan() {
if (Entry) {
VPValue DummyValue;
@@ -1032,51 +1047,55 @@ void VPlan::execute(VPTransformState *State) {
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
Block->execute(State);
- VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
- BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
-
// Fix the latch value of canonical, reduction and first-order recurrences
// phis in the vector loop.
- VPBasicBlock *Header = getVectorLoopRegion()->getEntryBasicBlock();
- for (VPRecipeBase &R : Header->phis()) {
- // Skip phi-like recipes that generate their backedege values themselves.
- if (isa<VPWidenPHIRecipe>(&R))
+ for (VPBasicBlock *Header :
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow(Entry))) {
+ if (!Header->isHeader())
continue;
+ for (VPRecipeBase &R : Header->phis()) {
+ VPBasicBlock *LatchVPBB =
+ cast<VPBasicBlock>(Header->getPredecessors()[1]);
+ BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB];
- if (isa<VPWidenPointerInductionRecipe>(&R) ||
- isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- PHINode *Phi = nullptr;
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
- Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
- } else {
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
- assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
- "recipe generating only scalars should have been replaced");
- auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
- Phi = cast<PHINode>(GEP->getPointerOperand());
- }
-
- Phi->setIncomingBlock(1, VectorLatchBB);
+ // Skip phi-like recipes that generate their backedege values themselves.
+ if (isa<VPWidenPHIRecipe>(&R))
+ continue;
- // Move the last step to the end of the latch block. This ensures
- // consistent placement of all induction updates.
- Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
- Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+ if (isa<VPWidenPointerInductionRecipe>(&R) ||
+ isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ PHINode *Phi = nullptr;
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
+ Phi = cast<PHINode>(State->get(R.getVPSingleValue()));
+ } else {
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
+ assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) &&
+ "recipe generating only scalars should have been replaced");
+ auto *GEP = cast<GetElementPtrInst>(State->get(WidenPhi));
+ Phi = cast<PHINode>(GEP->getPointerOperand());
+ }
+
+ Phi->setIncomingBlock(1, VectorLatchBB);
+
+ // Move the last step to the end of the latch block. This ensures
+ // consistent placement of all induction updates.
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue(1));
+ Inc->moveBefore(VectorLatchBB->getTerminator()->getPrevNode());
+
+ // Use the steps for the last part as backedge value for the induction.
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
+ Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
+ continue;
+ }
- // Use the steps for the last part as backedge value for the induction.
- if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
- Inc->setOperand(0, State->get(IV->getLastUnrolledPartOperand()));
- continue;
+ auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
+ bool NeedsScalar = isa<VPScalarPHIRecipe>(PhiR) ||
+ (isa<VPReductionPHIRecipe>(PhiR) &&
+ cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
+ Value *Phi = State->get(PhiR, NeedsScalar);
+ Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
+ cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
-
- auto *PhiR = cast<VPHeaderPHIRecipe>(&R);
- bool NeedsScalar =
- isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
- (isa<VPReductionPHIRecipe>(PhiR) &&
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop());
- Value *Phi = State->get(PhiR, NeedsScalar);
- Value *Val = State->get(PhiR->getBackedgeValue(), NeedsScalar);
- cast<PHINode>(Phi)->addIncoming(Val, VectorLatchBB);
}
State->CFG.DTU.flush();
@@ -1418,8 +1437,13 @@ void VPlanIngredient::print(raw_ostream &O) const {
#endif
bool VPValue::isDefinedOutsideLoopRegions() const {
- return !hasDefiningRecipe() ||
- !getDefiningRecipe()->getParent()->getEnclosingLoopRegion();
+ auto *DefR = getDefiningRecipe();
+ if (!DefR)
+ return true;
+
+ const VPBasicBlock *DefVPBB = DefR->getParent();
+ auto *Plan = DefVPBB->getPlan();
+ return DefVPBB == Plan->getPreheader() || DefVPBB == Plan->getEntry();
}
void VPValue::replaceAllUsesWith(VPValue *New) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 6f7c5c3580114d..58e57db618ddd8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -2239,6 +2239,45 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe,
#endif
};
+/// Recipe to generate a scalar PHI. Used to generate code for recipes that
+/// produce scalar header phis, including VPCanonicalIVPHIRecipe and
+/// VPEVLBasedIVPHIRecipe.
+class VPScalarPHIRecipe : public VPHeaderPHIRecipe {
+ std::string Name;
+
+public:
+ VPScalarPHIRecipe(VPValue *Start, VPValue *BackedgeValue, DebugLoc DL,
+ StringRef Name)
+ : VPHeaderPHIRecipe(VPDef::VPScalarPHISC, nullptr, Start, DL),
+ Name(Name.str()) {
+ addOperand(BackedgeValue);
+ }
+
+ ~VPScalarPHIRecipe() override = default;
+
+ VPScalarPHIRecipe *clone() override {
+ llvm_unreachable("cloning not implemented yet");
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPScalarPHISC)
+
+ /// Generate the phi/select nodes.
+ void execute(VPTransformState &State) override;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
/// A recipe for handling phis that are widened in the vector loop.
/// In the VPlan native path, all incoming VPValues & VPBasicBlock pairs are
/// managed in the recipe directly.
@@ -3134,8 +3173,10 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
return D->getVPDefID() == VPDef::VPCanonicalIVPHISC;
}
- /// Generate the canonical scalar induction phi of the vector loop.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
@@ -3231,9 +3272,10 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC;
}
- /// Generate phi for handling IV based on EVL over iterations correctly.
- /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe.
- void execute(VPTransformState &State) override;
+ void execute(VPTransformState &State) override {
+ llvm_unreachable(
+ "cannot execute this recipe, should be replaced by VPScalarPHIRecipe");
+ }
/// Return the cost of this VPEVLBasedIVPHIRecipe.
InstructionCost computeCost(ElementCount VF,
@@ -3543,6 +3585,8 @@ class VPBasicBlock : public VPBlockBase {
return NewBlock;
}
+ bool isHeader() { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); }
+
protected:
/// Execute the recipes in the IR basic block \p BB.
void executeRecipes(VPTransformState *State, BasicBlock *BB);
@@ -3700,6 +3744,10 @@ class VPRegionBlock : public VPBlockBase {
/// Clone all blocks in the single-entry single-exit region of the block and
/// their recipes without updating the operands of the cloned recipes.
VPRegionBlock *clone() override;
+
+ /// Remove the current region from its VPlan, connecting its predecessor to
+ /// its entry and exiting block to its successor.
+ void removeRegion();
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -3833,10 +3881,10 @@ class VPlan {
/// whether to execute the scalar tail loop or the exit block from the loop
/// latch.
const VPBasicBlock *getMiddleBlock() const {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
}
VPBasicBlock *getMiddleBlock() {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
+ return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor());
}
/// Return an iterator range over the VPIRBasicBlock wrapping the exit blocks
@@ -3958,9 +4006,7 @@ class VPlan {
}
/// Returns the preheader of the vector loop region.
- VPBasicBlock *getVectorPreheader() {
- return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor());
- }
+ VPBasicBlock *getVectorPreheader() { return cast<VPBasicBlock>(getEntry()); }
/// Returns the canonical induction recipe of the vector loop.
VPCanonicalIVPHIRecipe *getCanonicalIV() {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index cb42cfe8159b04..969d07b229e469 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -213,14 +213,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
TypeSwitch<const VPRecipeBase *, Type *>(V->getDefiningRecipe())
.Case<VPActiveLaneMaskPHIRecipe, VPCanonicalIVPHIRecipe,
VPFirstOrderRecurrencePHIRecipe, VPReductionPHIRecipe,
- VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe>(
- [this](const auto *R) {
- // Handle header phi recipes, except VPWidenIntOrFpInduction
- // which needs special handling due it being possibly truncated.
- // TODO: consider inferring/caching type of siblings, e.g.,
- // backedge value, here and in cases below.
- return inferScalarType(R->getStartValue());
- })
+ VPWidenPointerInductionRecipe, VPEVLBasedIVPHIRecipe,
+ VPScalarPHIRecipe>([this](const auto *R) {
+ // Handle header phi recipes, except VPWidenIntOrFpInduction
+ // which needs special handling due it being possibly truncated.
+ // TODO: consider inferring/caching type of siblings, e.g.,
+ // backedge value, here and in cases below.
+ return inferScalarType(R->getStartValue());
+ })
.Case<VPWidenIntOrFpInductionRecipe, VPDerivedIVRecipe>(
[](const auto *R) { return R->getScalarType(); })
.Case<VPReductionRecipe, VPPredInstPHIRecipe, VPWidenPHIRecipe,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index b2ee31c3e240a1..dc8a4435818475 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -491,11 +491,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- if (!getParent()->isExiting())
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
+ if (!State.CFG.VPBB2IRBB.contains(Header))
return CondBr;
- VPRegionBlock *ParentRegion = getParent()->getParent();
- VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]);
return CondBr;
}
@@ -506,9 +505,7 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *Cond = Builder.CreateICmpEQ(IV, TC);
// Now create the branch.
- auto *Plan = getParent()->getPlan();
- VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
- VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
+ VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]);
// Replace the temporary unreachable terminator with a new conditional
// branch, hooking it up to backward destination (the header) now and to the
@@ -3099,17 +3096,6 @@ InstructionCost VPInterleaveRecipe::computeCost(ElementCount VF,
VectorTy, std::nullopt, CostKind, 0);
}
-void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
- Value *Start = getStartValue()->getLiveInIRValue();
- PHINode *Phi = PHINode::Create(Start->getType(), 2, "index");...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
@@ -2969,8 +2963,8 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { | |||
for (Instruction *PI : PredicatedInstructions) | |||
sinkScalarOperands(&*PI); | |||
|
|||
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); | |||
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); | |||
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>( | |
auto *HeaderVPBB = cast<VPBasicBlock>( |
VPBasicBlock *HeaderVPBB = | ||
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock(); | ||
cast<VPBasicBlock>(BestVPlan.getVectorPreheader()->getSingleSuccessor()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBasicBlock *HeaderVPBB = | |
BestVPlan.getVectorLoopRegion()->getEntryBasicBlock(); | |
cast<VPBasicBlock>(BestVPlan.getVectorPreheader()->getSingleSuccessor()); | |
auto *HeaderVPBB = | |
cast<VPBasicBlock>(BestVPlan.getVectorPreheader()->getSingleSuccessor()); |
@@ -425,13 +425,17 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) { | |||
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); | |||
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); | |||
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This kind of accessing may create nullptr entry in CFG.VPBB2IRBB
. Is this ok?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to use lookup
, thanks~
VPBasicBlock *LatchVPBB = | ||
cast<VPBasicBlock>(Header->getPredecessors()[1]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBasicBlock *LatchVPBB = | |
cast<VPBasicBlock>(Header->getPredecessors()[1]); | |
auto *LatchVPBB = | |
cast<VPBasicBlock>(Header->getPredecessors()[1]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
if (isa<VPWidenPointerInductionRecipe>(&R) || | ||
isa<VPWidenIntOrFpInductionRecipe>(&R)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (isa<VPWidenPointerInductionRecipe>(&R) || | |
isa<VPWidenIntOrFpInductionRecipe>(&R)) { | |
if (isa<VPWidenPointerInductionRecipe,VPWidenIntOrFpInductionRecipe>(&R)) { |
(isa<VPReductionPHIRecipe>(PhiR) && | ||
cast<VPReductionPHIRecipe>(PhiR)->isInLoop()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use dyn_cast instead of isa/cast
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dyn_cast would need a separate assignment I think? Left as is for now, as this just moves the existing code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It doesn't even move the existing code, merely indents it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Changes should be gone now, thanks
/// produce scalar header phis, including VPCanonicalIVPHIRecipe and | ||
/// VPEVLBasedIVPHIRecipe. | ||
class VPScalarPHIRecipe : public VPHeaderPHIRecipe { | ||
std::string Name; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
std::string Name; | |
SmallString Name; |
@@ -3543,6 +3585,8 @@ class VPBasicBlock : public VPBlockBase { | |||
return NewBlock; | |||
} | |||
|
|||
bool isHeader() { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool isHeader() { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); } | |
bool isHeader() const { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Marked as const now, thanks
VPBasicBlock *getVectorPreheader() { | ||
return cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()); | ||
} | ||
VPBasicBlock *getVectorPreheader() { return cast<VPBasicBlock>(getEntry()); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPBasicBlock *getVectorPreheader() { return cast<VPBasicBlock>(getEntry()); } | |
VPBasicBlock *getVectorPreheader() const { return cast<VPBasicBlock>(getEntry()); } |
7f1ef83
to
d31b3f6
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just pushed a major update of the patch, as most pending improvements needed for this have now landed.
There are ~8 tests still failing/needing updating, but the general changes should be good to review
@@ -425,13 +425,17 @@ void VPBasicBlock::connectToPredecessors(VPTransformState::CFGState &CFG) { | |||
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); | |||
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); | |||
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated to use lookup
, thanks~
VPBasicBlock *LatchVPBB = | ||
cast<VPBasicBlock>(Header->getPredecessors()[1]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
(isa<VPReductionPHIRecipe>(PhiR) && | ||
cast<VPReductionPHIRecipe>(PhiR)->isInLoop()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
dyn_cast would need a separate assignment I think? Left as is for now, as this just moves the existing code
@@ -3543,6 +3585,8 @@ class VPBasicBlock : public VPBlockBase { | |||
return NewBlock; | |||
} | |||
|
|||
bool isHeader() { return any_of(phis(), IsaPred<VPHeaderPHIRecipe>); } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Marked as const now, thanks
d31b3f6
to
1c6fc7a
Compare
Currently should be NFC, but will be used by #117506.
…torization. Currently should be NFC, but will be used by llvm/llvm-project#117506.
Currently should be NFC, but will be used by llvm#117506.
Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm#117506) and improves codegen slightly in some cases.
Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm#117506) and improves codegen slightly in some cases.
1c6fc7a
to
6894aa5
Compare
6894aa5
to
34b096d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ping :)
Rebased after 9a26b29, now all relevant tests are updated
…lvm#137883) Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm#117506) and improves codegen slightly in some cases. PR: llvm#137883
…lvm#137883) Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm#117506) and improves codegen slightly in some cases. PR: llvm#137883
…lvm#137883) Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm#117506) and improves codegen slightly in some cases. PR: llvm#137883
…ization. (#137883) Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm/llvm-project#117506) and improves codegen slightly in some cases. PR: llvm/llvm-project#137883
…lvm#137883) Generalize isUniformAfterVectorization check to not rely on the region, but purely work on checking operands and opcodes. This will be needed when disolving the vector region (llvm#117506) and improves codegen slightly in some cases. PR: llvm#137883
!fixup update more tests.
34b096d
to
c274eea
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ping :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Raises some thoughts...
VPB->setParent(nullptr); | ||
|
||
VPBlockUtils::connectBlocks(Preheader, Header); | ||
VPBlockUtils::connectBlocks(Exiting, Middle); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the backedge be added too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, moved here, thanks!
auto *Plan = DefVPBB->getPlan(); | ||
if (Plan->getVectorLoopRegion()) | ||
return !DefR->getParent()->getEnclosingLoopRegion(); | ||
return DefVPBB == Plan->getEntry(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The only out-of-loop place that values can be defined in when regions are gone - is in Plan's entry block?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All relevant hoisting should already have happened before disolving the regions; updated to only check non-live-ins if the top-level loop region still exists.
vp_depth_first_shallow(State.Plan->getEntry()), | ||
[&State](VPBlockBase *VPB, bool) { | ||
auto *VPBB = dyn_cast<VPBasicBlock>(VPB); | ||
return VPBB && VPBB->isHeader(State.VPDT) ? VPBB : nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At this point when IRBB's have been created for VPBB's and LI is maintained for the former, can lookup VPBB2IRBB for each VPBB and then check if LI->isLoopHeader(), before retrieving LI->getLoopFor().
return; | ||
|
||
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); | ||
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); | ||
BasicBlock *HeaderBB = State.CFG.VPBB2IRBB[HeaderVPBB]; | ||
|
||
// Remove redundant induction instructions. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could cse() be applied as a VPlan2VPlan transformation rather than to the generated IR below? Possibly even before dismantling regions, when header blocks are easier to find.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, this should also be moved to a VPlan-transform. The limitation to the loop region isn't really material for the VPlan version.
@@ -2874,7 +2870,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { | |||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could setProfileInfoAfterUnrolling() be applied to the branch recipe as part of its VPIRMetadata, rather than above to the branch instruction it generates?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, but it would also need adding metadata to the branch in the scalar loop.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which calls for expanding VPlan's scope...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, although it will require modifying the original scalar loop in VPlan, which probably needs more thought what kind of modifications we need. Maybe adding metadata would be sufficient?
// Don't apply optimizations below when no vector loop remains, as they all | ||
// require one at the moment. | ||
VPBasicBlock *HeaderVPBB = | ||
getHeaderForMainVectorLoop(*State.Plan, State.VPDT); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that here we could ask if LI->isHeader() instead as we'll be calling LI->getLoopFor() next, i.e., rely on the IRBB's generated from VPBB's and their mappings in VPBB2IRBB and LI.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could, although it wouldnt' be much simpler? I left it as is for now
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || | ||
DefR->getParent()->getEnclosingLoopRegion()); | ||
} | ||
|
||
bool VPValue::isDefinedOutsideLoopRegions() const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool VPValue::isDefinedOutsideLoopRegions() const { | |
bool VPValue::isDefinedOutsideLoop() const { |
whether loop is represented as a region or not?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
@@ -583,11 +583,10 @@ Value *VPInstruction::generate(VPTransformState &State) { | |||
CondBr->setSuccessor(0, nullptr); | |||
Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); | |||
|
|||
if (!getParent()->isExiting()) | |||
VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Comment above:
"hooking it up to backward destination for exiting blocks now"
"hooking it up to backward destination (header) for latch blocks now"
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
auto *Plan = getParent()->getPlan(); | ||
VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); | ||
VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); | ||
VPBasicBlock *Header = cast<VPBasicBlock>(getParent()->getSuccessors()[1]); | ||
|
||
// Replace the temporary unreachable terminator with a new conditional | ||
// branch, hooking it up to backward destination (the header) now and to the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(as commented here)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated thanks
if (PredVPSuccessors.size() == 2) | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Add comment explaining this newly ignored case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Check not needed for the latest version, removed thanks.
@@ -2874,7 +2870,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { | |||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which calls for expanding VPlan's scope...
@@ -7859,6 +7855,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||
BestVPlan, BestVF, | |||
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)); | |||
VPlanTransforms::removeDeadRecipes(BestVPlan); | |||
|
|||
VPBasicBlock *MiddleVPBB = | |||
BestVPlan.getVectorLoopRegion() ? BestVPlan.getMiddleBlock() : nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, right. Deserves a comment.
Also worth noting phase ordering aspects of disolveLoopRegions() - intentionally after optimizeForVFAndUF()?
auto *VPBB = dyn_cast<VPBasicBlock>(VPB); | ||
if (!VPBB) | ||
return false; | ||
if (auto *R = VPBB->getParent()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (auto *R = VPBB->getParent()) | |
// If VPBB is in a region R, VPBB is a loop header if R is a loop region with VPBB as its entry, i.e., free of predecessors. | |
if (auto *R = VPBB->getParent()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
if (auto *R = VPBB->getParent()) | ||
return !R->isReplicator() && VPBB->getNumPredecessors() == 0; | ||
|
||
assert(!VPB->getParent() && "checking blocks in regions not implemented yet"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Implemented above?
assert(!VPB->getParent() && "checking blocks in regions not implemented yet"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, removed, thanks
return !R->isReplicator() && VPBB->getNumPredecessors() == 0; | ||
|
||
assert(!VPB->getParent() && "checking blocks in regions not implemented yet"); | ||
return VPB->getPredecessors().size() == 2 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return VPB->getPredecessors().size() == 2 && | |
// A header dominates its second predecessor (the latch), with the other predecessor being the preheader. | |
return VPB->getPredecessors().size() == 2 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks!
@@ -514,6 +546,11 @@ void VPBasicBlock::execute(VPTransformState *State) { | |||
|
|||
// 2. Fill the IR basic block with IR instructions. | |||
executeRecipes(State, NewBB); | |||
|
|||
// If this block is a latch, update CurrentParentLoop. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// If this block is a latch, update CurrentParentLoop. | |
// If this block is a latch, update CurrentParentLoop. The second successor of a latch is a header, with the other successor being an exit/middle. |
Wrapping in VPBlockUtils::isLatch()
may be clearer?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
// For non-live-ins, check if is in a region only if the top-level loop region | ||
// still exits. | ||
const VPBasicBlock *DefVPBB = DefR->getParent(); | ||
auto *Plan = DefVPBB->getPlan(); | ||
return Plan->getVectorLoopRegion() && !DefVPBB->getEnclosingLoopRegion(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How/Does this differ from the existing isDefinedOutsideLoopRegions()
, if it still depends on (loop) regions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The previousisDefinedOutsideLoopRegions
would return true if there are no loop regions (because the only possible case was when it had been removed). Now we need to be more conservative, because the regions will be removed, but loops still remain.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
De Morgan'izing previous isDefinedOutsideLoopRegions()
:
!(DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion()))
==
!DefR || !(!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion())
==
!DefR || (DefR->getParent()->getPlan()->getVectorLoopRegion() &&
!DefR->getParent()->getEnclosingLoopRegion())
which appears to be what the current isDefinedOutsideLoop()
is doing?
This is already conservative in the sense that answering false means it may be defined inside loops - those that remain after loop regions are removed? Perhaps more accurately called mustBeDefinedOutsideLoops()
, instead of negating isDefinedInsideLoopRegions()
which is precisely documented to return "true if there is a vector loop region and \p VPV is defined in a loop region".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes, messed something up on my side when trying to restore, should be done now. Still kept the new name isDefinedOutsideLoop
.
I've not adjusted the name of isDefinedInsideLoopRegions
yet, as it's code is now not touched in the PR. can do separately or pull in here.
// branch, hooking it up to backward destination for exiting blocks now and | ||
// to forward destination(s) later when they are created. | ||
// branch, hooking it up to backward destination (header) for latch blocks | ||
// now to forward destination(s) later when they are created. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// now to forward destination(s) later when they are created. | |
// now, and to forward destination(s) later when they are created. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
doen thanks
// Note that CreateCondBr expects a valid BB as first argument, so we need | ||
// to set it to nullptr later. | ||
// branch, hooking it up to backward destination (the header) for latch | ||
// blocks now forward destination (the exit/middle block) later when it is |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// blocks now forward destination (the exit/middle block) later when it is | |
// blocks now, and to forward destination (the exit/middle block) later when it is |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
@@ -1124,10 +1121,6 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent, | |||
|
|||
void VPPhi::execute(VPTransformState &State) { | |||
State.setDebugLocFrom(getDebugLoc()); | |||
assert(getParent() == | |||
getParent()->getPlan()->getVectorLoopRegion()->getEntry() && | |||
"VPInstructions with PHI opcodes must be used for header phis only " |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah, ok, the assert is essentially still valid, but harder to detect - if VPPhi is in a header or not.
|
||
bool VPBlockUtils::isLatch(const VPBlockBase *VPB, | ||
const VPDominatorTree &VPDT) { | ||
return VPB->getNumSuccessors() == 2 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
return VPB->getNumSuccessors() == 2 && | |
// A latch has a header as its second successor, with its other successor leaving the loop. | |
// A preheader OTOH has a header as its first (and only) successor. | |
return VPB->getNumSuccessors() == 2 && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
updated thanks
assert(PredBB && "Predecessor basic-block not found building successor."); | ||
assert(CFG.VPBB2IRBB.contains(PredVPBB) && | ||
"Predecessor basic-block not found building successor."); | ||
BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can retrain
BasicBlock *PredBB = CFG.VPBB2IRBB.lookup(PredVPBB); | |
BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Restored, thanks
// branch, hooking it up to backward destination for exiting blocks now and | ||
// to forward destination(s) later when they are created. | ||
// branch, hooking it up to backward destination (header) for latch blocks | ||
// now, and to forward destination(s) later when they are created. | ||
BranchInst *CondBr = | ||
Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); | ||
CondBr->setSuccessor(0, nullptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CondBr->setSuccessor(0, nullptr); | |
// First successor is always forward. | |
CondBr->setSuccessor(0, nullptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doen thanks
// branch, hooking it up to backward destination for exiting blocks now and | ||
// to forward destination(s) later when they are created. | ||
// branch, hooking it up to backward destination (header) for latch blocks | ||
// now, and to forward destination(s) later when they are created. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be more consistent with BranchOnCount below, as in
// now, and to forward destination(s) later when they are created. | |
// now, and to forward destination(s) later when they are created. | |
// Second successor may be backwards - iff it is already in VPBB2IRBB. | |
VPBasicBlock *SecondVPSucc = cast<VPBasicBlock>(getParent()->getSuccessors()[1]); | |
BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc); | |
// Note that CreateCondBr expects a valid BB as first argument, but first successor is always forwards, so we reset it to nullptr. | |
BranchInst *CondBr = | |
Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), SecondIRSucc); | |
CondBr->setSuccessor(0, nullptr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done and moved to a helper used for both, thanks
// branch, hooking it up to backward destination (the header) for latch | ||
// blocks now, and to forward destination (the exit/middle block) later when | ||
// it is created. Note that CreateCondBr expects a valid BB as first | ||
// argument, so we need to set it to nullptr later. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// argument, so we need to set it to nullptr later. | |
// argument, but first successor is forwards, so we reset it to nullptr. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be adjusted in the unified version, thanks
// For non-live-ins, check if is in a region only if the top-level loop region | ||
// still exits. | ||
const VPBasicBlock *DefVPBB = DefR->getParent(); | ||
auto *Plan = DefVPBB->getPlan(); | ||
return Plan->getVectorLoopRegion() && !DefVPBB->getEnclosingLoopRegion(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
De Morgan'izing previous isDefinedOutsideLoopRegions()
:
!(DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion()))
==
!DefR || !(!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion())
==
!DefR || (DefR->getParent()->getPlan()->getVectorLoopRegion() &&
!DefR->getParent()->getEnclosingLoopRegion())
which appears to be what the current isDefinedOutsideLoop()
is doing?
This is already conservative in the sense that answering false means it may be defined inside loops - those that remain after loop regions are removed? Perhaps more accurately called mustBeDefinedOutsideLoops()
, instead of negating isDefinedInsideLoopRegions()
which is precisely documented to return "true if there is a vector loop region and \p VPV is defined in a loop region".
Building on top of #114305,
replace VPRegionBlocks with explicit CFG before executing.
This will enable further simplifications of phi handling during execution
and transformations that do not have to preserve the canonical IV required
by loop regions. This for example could include replacing the canonical
IV with an EVL based phi while completely removing the original canonical
IV.