@@ -207,6 +207,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207
207
return Parent->getEnclosingBlockWithPredecessors ();
208
208
}
209
209
210
+ bool VPBasicBlock::isHeader (const VPDominatorTree &VPDT) const {
211
+ return getPredecessors ().size () == 2 &&
212
+ VPDT.dominates (this , getPredecessors ()[1 ]);
213
+ }
214
+
210
215
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi () {
211
216
iterator It = begin ();
212
217
while (It != end () && It->isPhi ())
@@ -351,8 +356,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
351
356
}
352
357
353
358
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor (VPRecipeBase *R) {
354
- VPRegionBlock *LoopRegion = R->getParent ()->getEnclosingLoopRegion ();
355
- return VPBB2IRBB[LoopRegion->getPreheaderVPBB ()];
359
+ return VPBB2IRBB[cast<VPBasicBlock>(R->getParent ()->getPredecessors ()[0 ])];
356
360
}
357
361
358
362
void VPTransformState::addNewMetadata (Instruction *To,
@@ -437,14 +441,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
437
441
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
438
442
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
439
443
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
440
- BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
444
+ BasicBlock *PredBB = CFG.VPBB2IRBB .lookup (PredVPBB);
445
+ if (!PredBB)
446
+ continue ;
441
447
442
448
assert (PredBB && " Predecessor basic-block not found building successor." );
443
449
auto *PredBBTerminator = PredBB->getTerminator ();
444
450
LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
445
451
446
452
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
447
453
if (isa<UnreachableInst>(PredBBTerminator)) {
454
+ if (PredVPSuccessors.size () == 2 )
455
+ continue ;
448
456
assert (PredVPSuccessors.size () == 1 &&
449
457
" Predecessor ending w/o branch must have single successor." );
450
458
DebugLoc DL = PredBBTerminator->getDebugLoc ();
@@ -500,11 +508,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
500
508
bool Replica = bool (State->Lane );
501
509
BasicBlock *NewBB = State->CFG .PrevBB ; // Reuse it if possible.
502
510
511
+ if (isHeader (State->VPDT )) {
512
+ // Create and register the new vector loop.
513
+ Loop *PrevParentLoop = State->CurrentParentLoop ;
514
+ State->CurrentParentLoop = State->LI ->AllocateLoop ();
515
+
516
+ // Insert the new loop into the loop nest and register the new basic blocks
517
+ // before calling any utilities such as SCEV that require valid LoopInfo.
518
+ if (PrevParentLoop)
519
+ PrevParentLoop->addChildLoop (State->CurrentParentLoop );
520
+ else
521
+ State->LI ->addTopLevelLoop (State->CurrentParentLoop );
522
+ }
523
+
503
524
auto IsReplicateRegion = [](VPBlockBase *BB) {
504
525
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
505
- return R && R->isReplicator ();
526
+ assert ((!R || R->isReplicator ()) &&
527
+ " only replicate region blocks should remain" );
528
+ return R;
506
529
};
507
-
508
530
// 1. Create an IR basic block.
509
531
if ((Replica && this == getParent ()->getEntry ()) ||
510
532
IsReplicateRegion (getSingleHierarchicalPredecessor ())) {
@@ -527,6 +549,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
527
549
528
550
// 2. Fill the IR basic block with IR instructions.
529
551
executeRecipes (State, NewBB);
552
+
553
+ // If this block is a latch, update CurrentParentLoop.
554
+ if (any_of (getSuccessors (), [State, this ](VPBlockBase *Succ) {
555
+ auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
556
+ return VPBB && VPBB->isHeader (State->VPDT ) &&
557
+ State->VPDT .dominates (Succ, this );
558
+ }))
559
+ State->CurrentParentLoop = State->CurrentParentLoop ->getParentLoop ();
530
560
}
531
561
532
562
VPBasicBlock *VPBasicBlock::clone () {
@@ -739,35 +769,13 @@ VPRegionBlock *VPRegionBlock::clone() {
739
769
}
740
770
741
771
void VPRegionBlock::execute (VPTransformState *State) {
742
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
743
- RPOT (Entry);
744
-
745
- if (!isReplicator ()) {
746
- // Create and register the new vector loop.
747
- Loop *PrevParentLoop = State->CurrentParentLoop ;
748
- State->CurrentParentLoop = State->LI ->AllocateLoop ();
749
-
750
- // Insert the new loop into the loop nest and register the new basic blocks
751
- // before calling any utilities such as SCEV that require valid LoopInfo.
752
- if (PrevParentLoop)
753
- PrevParentLoop->addChildLoop (State->CurrentParentLoop );
754
- else
755
- State->LI ->addTopLevelLoop (State->CurrentParentLoop );
756
-
757
- // Visit the VPBlocks connected to "this", starting from it.
758
- for (VPBlockBase *Block : RPOT) {
759
- LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
760
- Block->execute (State);
761
- }
762
-
763
- State->CurrentParentLoop = PrevParentLoop;
764
- return ;
765
- }
766
-
772
+ assert (isReplicator () &&
773
+ " Loop regions should have been lowered to plain CFG" );
767
774
assert (!State->Lane && " Replicating a Region with non-null instance." );
768
-
769
- // Enter replicating mode.
770
775
assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
776
+
777
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
778
+ Entry);
771
779
State->Lane = VPLane (0 );
772
780
for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
773
781
++Lane) {
@@ -842,6 +850,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
842
850
}
843
851
#endif
844
852
853
+ void VPRegionBlock::removeRegion () {
854
+ auto *Header = cast<VPBasicBlock>(getEntry ());
855
+ VPBlockBase *Preheader = getSinglePredecessor ();
856
+ auto *Exiting = cast<VPBasicBlock>(getExiting ());
857
+
858
+ VPBlockBase *Middle = getSingleSuccessor ();
859
+ VPBlockUtils::disconnectBlocks (Preheader, this );
860
+ VPBlockUtils::disconnectBlocks (this , Middle);
861
+
862
+ for (VPBlockBase *VPB : vp_depth_first_shallow (Entry))
863
+ VPB->setParent (nullptr );
864
+
865
+ VPBlockUtils::connectBlocks (Preheader, Header);
866
+ VPBlockUtils::connectBlocks (Exiting, Middle);
867
+ }
868
+
845
869
VPlan::VPlan (Loop *L) {
846
870
setEntry (createVPIRBasicBlock (L->getLoopPreheader ()));
847
871
ScalarHeader = createVPIRBasicBlock (L->getHeader ());
@@ -951,57 +975,57 @@ void VPlan::execute(VPTransformState *State) {
951
975
for (VPBlockBase *Block : RPOT)
952
976
Block->execute (State);
953
977
954
- State->CFG .DTU .flush ();
955
-
956
- auto *LoopRegion = getVectorLoopRegion ();
957
- if (!LoopRegion)
958
- return ;
959
-
960
- VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
961
- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
962
-
963
978
// Fix the latch value of canonical, reduction and first-order recurrences
964
979
// phis in the vector loop.
965
- VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
966
- for (VPRecipeBase &R : Header->phis ()) {
967
- // Skip phi-like recipes that generate their backedege values themselves.
968
- if (isa<VPWidenPHIRecipe>(&R))
980
+ for (VPBasicBlock *Header :
981
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow (Entry))) {
982
+ if (!Header->isHeader (State->VPDT ))
969
983
continue ;
984
+ for (VPRecipeBase &R : Header->phis ()) {
985
+ if (isa<VPWidenPHIRecipe>(&R))
986
+ continue ;
970
987
971
- if (isa<VPWidenInductionRecipe>(&R)) {
972
- PHINode *Phi = nullptr ;
973
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
974
- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
975
- } else {
976
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
977
- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
978
- " recipe generating only scalars should have been replaced" );
979
- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
980
- Phi = cast<PHINode>(GEP->getPointerOperand ());
988
+ auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors ()[1 ]);
989
+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
990
+
991
+ if (isa<VPWidenInductionRecipe>(&R)) {
992
+ PHINode *Phi = nullptr ;
993
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
994
+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
995
+ } else {
996
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
997
+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
998
+ " recipe generating only scalars should have been replaced" );
999
+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1000
+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1001
+ }
1002
+
1003
+ Phi->setIncomingBlock (1 , VectorLatchBB);
1004
+
1005
+ // Move the last step to the end of the latch block. This ensures
1006
+ // consistent placement of all induction updates.
1007
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1008
+ Inc->moveBefore (
1009
+ std::prev (VectorLatchBB->getTerminator ()->getIterator ()));
1010
+
1011
+ // Use the steps for the last part as backedge value for the induction.
1012
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1013
+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1014
+ continue ;
981
1015
}
982
1016
983
- Phi-> setIncomingBlock ( 1 , VectorLatchBB );
984
-
985
- // Move the last step to the end of the latch block. This ensures
986
- // consistent placement of all induction updates.
987
- Instruction *Inc = cast<Instruction>(Phi-> getIncomingValue ( 1 ));
988
- Inc-> moveBefore ( std::prev (VectorLatchBB-> getTerminator ()-> getIterator ()));
989
-
990
- // Use the steps for the last part as backedge value for the induction.
991
- if ( auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
992
- Inc-> setOperand ( 0 , State->get (IV-> getLastUnrolledPartOperand ()) );
993
- continue ;
1017
+ auto *PhiR = cast<VPSingleDefRecipe>(&R );
1018
+ // VPInstructions currently model scalar Phis only.
1019
+ bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1020
+ (isa<VPReductionPHIRecipe>(PhiR) &&
1021
+ cast<VPReductionPHIRecipe>(PhiR)-> isInLoop ( ));
1022
+
1023
+ Value *Phi = State-> get (PhiR, NeedsScalar);
1024
+ // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1025
+ // not.
1026
+ Value *Val = State->get (PhiR-> getOperand ( 1 ), NeedsScalar );
1027
+ cast<PHINode>(Phi)-> addIncoming (Val, VectorLatchBB) ;
994
1028
}
995
-
996
- auto *PhiR = cast<VPSingleDefRecipe>(&R);
997
- // VPInstructions currently model scalar Phis only.
998
- bool NeedsScalar = isa<VPInstruction>(PhiR) ||
999
- (isa<VPReductionPHIRecipe>(PhiR) &&
1000
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1001
- Value *Phi = State->get (PhiR, NeedsScalar);
1002
- // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1003
- Value *Val = State->get (PhiR->getOperand (1 ), NeedsScalar);
1004
- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1005
1029
}
1006
1030
}
1007
1031
@@ -1360,16 +1384,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1360
1384
1361
1385
#endif
1362
1386
1363
- // / Returns true if there is a vector loop region and \p VPV is defined in a
1364
- // / loop region.
1365
- static bool isDefinedInsideLoopRegions (const VPValue *VPV) {
1366
- const VPRecipeBase *DefR = VPV->getDefiningRecipe ();
1367
- return DefR && (!DefR->getParent ()->getPlan ()->getVectorLoopRegion () ||
1368
- DefR->getParent ()->getEnclosingLoopRegion ());
1369
- }
1370
-
1371
1387
bool VPValue::isDefinedOutsideLoopRegions () const {
1372
- return !isDefinedInsideLoopRegions (this );
1388
+ auto *DefR = getDefiningRecipe ();
1389
+ if (!DefR)
1390
+ return true ;
1391
+
1392
+ const VPBasicBlock *DefVPBB = DefR->getParent ();
1393
+ auto *Plan = DefVPBB->getPlan ();
1394
+ if (Plan->getVectorLoopRegion ())
1395
+ return !DefR->getParent ()->getEnclosingLoopRegion ();
1396
+ return DefVPBB == Plan->getEntry ();
1373
1397
}
1374
1398
void VPValue::replaceAllUsesWith (VPValue *New) {
1375
1399
replaceUsesWithIf (New, [](VPUser &, unsigned ) { return true ; });
0 commit comments