diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5244a5e7b1c41..9899679691f73 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9055,14 +9055,14 @@ static void addScalarResumePhis(VPRecipeBuilder &Builder, VPlan &Plan, VPValue *OneVPV = Plan.getOrAddLiveIn( ConstantInt::get(Plan.getCanonicalIV()->getScalarType(), 1)); for (VPRecipeBase &ScalarPhiR : *Plan.getScalarHeader()) { - auto *ScalarPhiIRI = cast(&ScalarPhiR); - auto *ScalarPhiI = dyn_cast(&ScalarPhiIRI->getInstruction()); - if (!ScalarPhiI) + auto *ScalarPhiIRI = dyn_cast(&ScalarPhiR); + if (!ScalarPhiIRI) break; // TODO: Extract final value from induction recipe initially, optimize to // pre-computed end value together in optimizeInductionExitUsers. - auto *VectorPhiR = cast(Builder.getRecipe(ScalarPhiI)); + auto *VectorPhiR = + cast(Builder.getRecipe(&ScalarPhiIRI->getIRPhi())); if (auto *WideIVR = dyn_cast(VectorPhiR)) { if (VPInstruction *ResumePhi = addResumePhiRecipeForInduction( WideIVR, VectorPHBuilder, ScalarPHBuilder, TypeInfo, @@ -9112,11 +9112,8 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, continue; for (VPRecipeBase &R : *ExitVPBB) { - auto *ExitIRI = dyn_cast(&R); + auto *ExitIRI = dyn_cast(&R); if (!ExitIRI) - continue; - auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); - if (!ExitPhi) break; if (ExitVPBB->getSinglePredecessor() != Plan.getMiddleBlock()) { assert(ExitIRI->getNumOperands() == @@ -9124,8 +9121,10 @@ collectUsersInExitBlocks(Loop *OrigLoop, VPRecipeBuilder &Builder, "early-exit must update exit values on construction"); continue; } + + PHINode &ExitPhi = ExitIRI->getIRPhi(); BasicBlock *ExitingBB = OrigLoop->getLoopLatch(); - Value *IncomingValue = ExitPhi->getIncomingValueForBlock(ExitingBB); + Value *IncomingValue = ExitPhi.getIncomingValueForBlock(ExitingBB); VPValue *V = Builder.getVPValueOrAddLiveIn(IncomingValue); ExitIRI->addOperand(V); if (V->isLiveIn()) @@ -10318,11 +10317,10 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { cast(R.getVPSingleValue()->getUnderlyingValue())); } for (VPRecipeBase &R : make_early_inc_range(*MainPlan.getScalarHeader())) { - auto *VPIRInst = cast(&R); - auto *IRI = dyn_cast(&VPIRInst->getInstruction()); - if (!IRI) + auto *VPIRInst = dyn_cast(&R); + if (!VPIRInst) break; - if (EpiWidenedPhis.contains(IRI)) + if (EpiWidenedPhis.contains(&VPIRInst->getIRPhi())) continue; // There is no corresponding wide induction in the epilogue plan that would // need a resume value. Remove the VPIRInst wrapping the scalar header phi diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f6293fa19b7db..8b53c559f6533 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1225,7 +1225,7 @@ VPIRBasicBlock *VPlan::createVPIRBasicBlock(BasicBlock *IRBB) { auto *VPIRBB = createEmptyVPIRBasicBlock(IRBB); for (Instruction &I : make_range(IRBB->begin(), IRBB->getTerminator()->getIterator())) - VPIRBB->appendRecipe(new VPIRInstruction(I)); + VPIRBB->appendRecipe(VPIRInstruction::create(I)); return VPIRBB; } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4fc382674f096..1597080fa0c58 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1026,22 +1026,28 @@ class VPInstruction : public VPRecipeWithIRFlags, }; /// A recipe to wrap on original IR instruction not to be modified during -/// execution, execept for PHIs. For PHIs, a single VPValue operand is allowed, -/// and it is used to add a new incoming value for the single predecessor VPBB. +/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass. /// Expect PHIs, VPIRInstructions cannot have any operands. class VPIRInstruction : public VPRecipeBase { Instruction &I; -public: +protected: + /// VPIRInstruction::create() should be used to create VPIRInstructions, as + /// subclasses may need to be created, e.g. VPIRPhi. VPIRInstruction(Instruction &I) : VPRecipeBase(VPDef::VPIRInstructionSC, ArrayRef()), I(I) {} +public: ~VPIRInstruction() override = default; + /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a + /// VPIRInstruction. + static VPIRInstruction *create(Instruction &I); + VP_CLASSOF_IMPL(VPDef::VPIRInstructionSC) VPIRInstruction *clone() override { - auto *R = new VPIRInstruction(I); + auto *R = create(I); for (auto *Op : operands()) R->addOperand(Op); return R; @@ -1085,6 +1091,29 @@ class VPIRInstruction : public VPRecipeBase { void extractLastLaneOfOperand(VPBuilder &Builder); }; +/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use +/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is +/// allowed, and it is used to add a new incoming value for the single +/// predecessor VPBB. +struct VPIRPhi : public VPIRInstruction { + VPIRPhi(PHINode &PN) : VPIRInstruction(PN) {} + + static inline bool classof(const VPRecipeBase *U) { + auto *R = dyn_cast(U); + return R && isa(R->getInstruction()); + } + + PHINode &getIRPhi() { return cast(getInstruction()); } + + void execute(VPTransformState &State) override; + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// VPWidenRecipe is a recipe for producing a widened instruction using the /// opcode and operands of the recipe. This recipe covers most of the /// traditional vectorization cases where each recipe transforms into a diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index efa238228f6c3..e66002f7f0bfd 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -1061,30 +1061,15 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, } #endif -void VPIRInstruction::execute(VPTransformState &State) { - assert((isa(&I) || getNumOperands() == 0) && - "Only PHINodes can have extra operands"); - for (const auto &[Idx, Op] : enumerate(operands())) { - VPValue *ExitValue = Op; - auto Lane = vputils::isUniformAfterVectorization(ExitValue) - ? VPLane::getFirstLane() - : VPLane::getLastLaneForVF(State.VF); - VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; - auto *PredVPBB = Pred->getExitingBasicBlock(); - BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; - // Set insertion point in PredBB in case an extract needs to be generated. - // TODO: Model extracts explicitly. - State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); - Value *V = State.get(ExitValue, VPLane(Lane)); - auto *Phi = cast(&I); - // If there is no existing block for PredBB in the phi, add a new incoming - // value. Otherwise update the existing incoming value for PredBB. - if (Phi->getBasicBlockIndex(PredBB) == -1) - Phi->addIncoming(V, PredBB); - else - Phi->setIncomingValueForBlock(PredBB, V); - } +VPIRInstruction *VPIRInstruction ::create(Instruction &I) { + if (auto *Phi = dyn_cast(&I)) + return new VPIRPhi(*Phi); + return new VPIRInstruction(I); +} +void VPIRInstruction::execute(VPTransformState &State) { + assert(!isa(this) && getNumOperands() == 0 && + "PHINodes must be handled by VPIRPhi"); // Advance the insert point after the wrapped IR instruction. This allows // interleaving VPIRInstructions and other recipes. State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator())); @@ -1117,6 +1102,40 @@ void VPIRInstruction::extractLastLaneOfOperand(VPBuilder &Builder) { void VPIRInstruction::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { O << Indent << "IR " << I; +} +#endif + +void VPIRPhi::execute(VPTransformState &State) { + PHINode *Phi = &getIRPhi(); + for (const auto &[Idx, Op] : enumerate(operands())) { + VPValue *ExitValue = Op; + auto Lane = vputils::isUniformAfterVectorization(ExitValue) + ? VPLane::getFirstLane() + : VPLane::getLastLaneForVF(State.VF); + VPBlockBase *Pred = getParent()->getPredecessors()[Idx]; + auto *PredVPBB = Pred->getExitingBasicBlock(); + BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB]; + // Set insertion point in PredBB in case an extract needs to be generated. + // TODO: Model extracts explicitly. + State.Builder.SetInsertPoint(PredBB, PredBB->getFirstNonPHIIt()); + Value *V = State.get(ExitValue, VPLane(Lane)); + // If there is no existing block for PredBB in the phi, add a new incoming + // value. Otherwise update the existing incoming value for PredBB. + if (Phi->getBasicBlockIndex(PredBB) == -1) + Phi->addIncoming(V, PredBB); + else + Phi->setIncomingValueForBlock(PredBB, V); + } + + // Advance the insert point after the wrapped IR instruction. This allows + // interleaving VPIRInstructions and other recipes. + State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator())); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPIRPhi::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + VPIRInstruction::print(O, Indent, SlotTracker); if (getNumOperands() != 0) { O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": "; diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b77ae8e54c78d..d949b230f5e17 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -798,8 +798,8 @@ void VPlanTransforms::optimizeInductionExitUsers( VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType()); for (VPIRBasicBlock *ExitVPBB : Plan.getExitBlocks()) { for (VPRecipeBase &R : *ExitVPBB) { - auto *ExitIRI = cast(&R); - if (!isa(ExitIRI->getInstruction())) + auto *ExitIRI = dyn_cast(&R); + if (!ExitIRI) break; for (auto [Idx, PredVPBB] : enumerate(ExitVPBB->getPredecessors())) { @@ -2155,20 +2155,20 @@ void VPlanTransforms::handleUncountableEarlyExit( VPBuilder MiddleBuilder(NewMiddle); VPBuilder EarlyExitB(VectorEarlyExitVPBB); for (VPRecipeBase &R : *VPEarlyExitBlock) { - auto *ExitIRI = cast(&R); - auto *ExitPhi = dyn_cast(&ExitIRI->getInstruction()); - if (!ExitPhi) + auto *ExitIRI = dyn_cast(&R); + if (!ExitIRI) break; + PHINode &ExitPhi = ExitIRI->getIRPhi(); VPValue *IncomingFromEarlyExit = RecipeBuilder.getVPValueOrAddLiveIn( - ExitPhi->getIncomingValueForBlock(UncountableExitingBlock)); + ExitPhi.getIncomingValueForBlock(UncountableExitingBlock)); if (OrigLoop->getUniqueExitBlock()) { // If there's a unique exit block, VPEarlyExitBlock has 2 predecessors // (MiddleVPBB and NewMiddle). Add the incoming value from MiddleVPBB // which is coming from the original latch. VPValue *IncomingFromLatch = RecipeBuilder.getVPValueOrAddLiveIn( - ExitPhi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); + ExitPhi.getIncomingValueForBlock(OrigLoop->getLoopLatch())); ExitIRI->addOperand(IncomingFromLatch); ExitIRI->extractLastLaneOfOperand(MiddleBuilder); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 6fe131879b1a2..f7fa659ba6a8a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -209,9 +209,8 @@ bool VPlanVerifier::verifyVPBasicBlock(const VPBasicBlock *VPBB) { auto *UI = cast(U); // TODO: check dominance of incoming values for phis properly. if (!UI || - isa(UI) || - (isa(UI) && - isa(cast(UI)->getInstruction())) || + isa(UI) || (isa(UI) && cast(UI)->getOpcode() == Instruction::PHI)) continue;