@@ -208,6 +208,11 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
208
208
return Parent->getEnclosingBlockWithPredecessors ();
209
209
}
210
210
211
+ bool VPBasicBlock::isHeader (const VPDominatorTree &VPDT) const {
212
+ return getPredecessors ().size () == 2 &&
213
+ VPDT.dominates (this , getPredecessors ()[1 ]);
214
+ }
215
+
211
216
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi () {
212
217
iterator It = begin ();
213
218
while (It != end () && It->isPhi ())
@@ -352,8 +357,7 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) {
352
357
}
353
358
354
359
BasicBlock *VPTransformState::CFGState::getPreheaderBBFor (VPRecipeBase *R) {
355
- VPRegionBlock *LoopRegion = R->getParent ()->getEnclosingLoopRegion ();
356
- return VPBB2IRBB[LoopRegion->getPreheaderVPBB ()];
360
+ return VPBB2IRBB[cast<VPBasicBlock>(R->getParent ()->getPredecessors ()[0 ])];
357
361
}
358
362
359
363
void VPTransformState::addNewMetadata (Instruction *To,
@@ -436,14 +440,18 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
436
440
for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors ()) {
437
441
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
438
442
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
439
- BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
443
+ BasicBlock *PredBB = CFG.VPBB2IRBB .lookup (PredVPBB);
444
+ if (!PredBB)
445
+ continue ;
440
446
441
447
assert (PredBB && " Predecessor basic-block not found building successor." );
442
448
auto *PredBBTerminator = PredBB->getTerminator ();
443
449
LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
444
450
445
451
auto *TermBr = dyn_cast<BranchInst>(PredBBTerminator);
446
452
if (isa<UnreachableInst>(PredBBTerminator)) {
453
+ if (PredVPSuccessors.size () == 2 )
454
+ continue ;
447
455
assert (PredVPSuccessors.size () == 1 &&
448
456
" Predecessor ending w/o branch must have single successor." );
449
457
DebugLoc DL = PredBBTerminator->getDebugLoc ();
@@ -499,11 +507,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
499
507
bool Replica = bool (State->Lane );
500
508
BasicBlock *NewBB = State->CFG .PrevBB ; // Reuse it if possible.
501
509
510
+ if (isHeader (State->VPDT )) {
511
+ // Create and register the new vector loop.
512
+ Loop *PrevParentLoop = State->CurrentParentLoop ;
513
+ State->CurrentParentLoop = State->LI ->AllocateLoop ();
514
+
515
+ // Insert the new loop into the loop nest and register the new basic blocks
516
+ // before calling any utilities such as SCEV that require valid LoopInfo.
517
+ if (PrevParentLoop)
518
+ PrevParentLoop->addChildLoop (State->CurrentParentLoop );
519
+ else
520
+ State->LI ->addTopLevelLoop (State->CurrentParentLoop );
521
+ }
522
+
502
523
auto IsReplicateRegion = [](VPBlockBase *BB) {
503
524
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
504
- return R && R->isReplicator ();
525
+ assert ((!R || R->isReplicator ()) &&
526
+ " only replicate region blocks should remain" );
527
+ return R;
505
528
};
506
-
507
529
// 1. Create an IR basic block.
508
530
if ((Replica && this == getParent ()->getEntry ()) ||
509
531
IsReplicateRegion (getSingleHierarchicalPredecessor ())) {
@@ -537,6 +559,14 @@ void VPBasicBlock::execute(VPTransformState *State) {
537
559
538
560
// 2. Fill the IR basic block with IR instructions.
539
561
executeRecipes (State, NewBB);
562
+
563
+ // If this block is a latch, update CurrentParentLoop.
564
+ if (any_of (getSuccessors (), [State, this ](VPBlockBase *Succ) {
565
+ auto *VPBB = dyn_cast<VPBasicBlock>(Succ);
566
+ return VPBB && VPBB->isHeader (State->VPDT ) &&
567
+ State->VPDT .dominates (Succ, this );
568
+ }))
569
+ State->CurrentParentLoop = State->CurrentParentLoop ->getParentLoop ();
540
570
}
541
571
542
572
VPBasicBlock *VPBasicBlock::clone () {
@@ -747,35 +777,13 @@ VPRegionBlock *VPRegionBlock::clone() {
747
777
}
748
778
749
779
void VPRegionBlock::execute (VPTransformState *State) {
750
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
751
- RPOT (Entry);
752
-
753
- if (!isReplicator ()) {
754
- // Create and register the new vector loop.
755
- Loop *PrevParentLoop = State->CurrentParentLoop ;
756
- State->CurrentParentLoop = State->LI ->AllocateLoop ();
757
-
758
- // Insert the new loop into the loop nest and register the new basic blocks
759
- // before calling any utilities such as SCEV that require valid LoopInfo.
760
- if (PrevParentLoop)
761
- PrevParentLoop->addChildLoop (State->CurrentParentLoop );
762
- else
763
- State->LI ->addTopLevelLoop (State->CurrentParentLoop );
764
-
765
- // Visit the VPBlocks connected to "this", starting from it.
766
- for (VPBlockBase *Block : RPOT) {
767
- LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
768
- Block->execute (State);
769
- }
770
-
771
- State->CurrentParentLoop = PrevParentLoop;
772
- return ;
773
- }
774
-
780
+ assert (isReplicator () &&
781
+ " Loop regions should have been lowered to plain CFG" );
775
782
assert (!State->Lane && " Replicating a Region with non-null instance." );
776
-
777
- // Enter replicating mode.
778
783
assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
784
+
785
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
786
+ Entry);
779
787
State->Lane = VPLane (0 );
780
788
for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
781
789
++Lane) {
@@ -850,6 +858,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
850
858
}
851
859
#endif
852
860
861
+ void VPRegionBlock::removeRegion () {
862
+ auto *Header = cast<VPBasicBlock>(getEntry ());
863
+ VPBlockBase *Preheader = getSinglePredecessor ();
864
+ auto *Exiting = cast<VPBasicBlock>(getExiting ());
865
+
866
+ VPBlockBase *Middle = getSingleSuccessor ();
867
+ VPBlockUtils::disconnectBlocks (Preheader, this );
868
+ VPBlockUtils::disconnectBlocks (this , Middle);
869
+
870
+ for (VPBlockBase *VPB : vp_depth_first_shallow (Entry))
871
+ VPB->setParent (nullptr );
872
+
873
+ VPBlockUtils::connectBlocks (Preheader, Header);
874
+ VPBlockUtils::connectBlocks (Exiting, Middle);
875
+ }
876
+
853
877
VPlan::VPlan (Loop *L) {
854
878
setEntry (createVPIRBasicBlock (L->getLoopPreheader ()));
855
879
ScalarHeader = createVPIRBasicBlock (L->getHeader ());
@@ -961,57 +985,57 @@ void VPlan::execute(VPTransformState *State) {
961
985
for (VPBlockBase *Block : RPOT)
962
986
Block->execute (State);
963
987
964
- State->CFG .DTU .flush ();
965
-
966
- auto *LoopRegion = getVectorLoopRegion ();
967
- if (!LoopRegion)
968
- return ;
969
-
970
- VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock ();
971
- BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
972
-
973
988
// Fix the latch value of canonical, reduction and first-order recurrences
974
989
// phis in the vector loop.
975
- VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
976
- for (VPRecipeBase &R : Header->phis ()) {
977
- // Skip phi-like recipes that generate their backedege values themselves.
978
- if (isa<VPWidenPHIRecipe>(&R))
990
+ for (VPBasicBlock *Header :
991
+ VPBlockUtils::blocksOnly<VPBasicBlock>(vp_depth_first_shallow (Entry))) {
992
+ if (!Header->isHeader (State->VPDT ))
979
993
continue ;
994
+ for (VPRecipeBase &R : Header->phis ()) {
995
+ if (isa<VPWidenPHIRecipe>(&R))
996
+ continue ;
980
997
981
- if (isa<VPWidenInductionRecipe>(&R)) {
982
- PHINode *Phi = nullptr ;
983
- if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
984
- Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
985
- } else {
986
- auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
987
- assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
988
- " recipe generating only scalars should have been replaced" );
989
- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
990
- Phi = cast<PHINode>(GEP->getPointerOperand ());
998
+ auto *LatchVPBB = cast<VPBasicBlock>(Header->getPredecessors ()[1 ]);
999
+ BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
1000
+
1001
+ if (isa<VPWidenInductionRecipe>(&R)) {
1002
+ PHINode *Phi = nullptr ;
1003
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
1004
+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
1005
+ } else {
1006
+ auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
1007
+ assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
1008
+ " recipe generating only scalars should have been replaced" );
1009
+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
1010
+ Phi = cast<PHINode>(GEP->getPointerOperand ());
1011
+ }
1012
+
1013
+ Phi->setIncomingBlock (1 , VectorLatchBB);
1014
+
1015
+ // Move the last step to the end of the latch block. This ensures
1016
+ // consistent placement of all induction updates.
1017
+ Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
1018
+ Inc->moveBefore (
1019
+ std::prev (VectorLatchBB->getTerminator ()->getIterator ()));
1020
+
1021
+ // Use the steps for the last part as backedge value for the induction.
1022
+ if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1023
+ Inc->setOperand (0 , State->get (IV->getLastUnrolledPartOperand ()));
1024
+ continue ;
991
1025
}
992
1026
993
- Phi-> setIncomingBlock ( 1 , VectorLatchBB );
994
-
995
- // Move the last step to the end of the latch block. This ensures
996
- // consistent placement of all induction updates.
997
- Instruction *Inc = cast<Instruction>(Phi-> getIncomingValue ( 1 ));
998
- Inc-> moveBefore ( std::prev (VectorLatchBB-> getTerminator ()-> getIterator ()));
999
-
1000
- // Use the steps for the last part as backedge value for the induction.
1001
- if ( auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R))
1002
- Inc-> setOperand ( 0 , State->get (IV-> getLastUnrolledPartOperand ()) );
1003
- continue ;
1027
+ auto *PhiR = cast<VPSingleDefRecipe>(&R );
1028
+ // VPInstructions currently model scalar Phis only.
1029
+ bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1030
+ (isa<VPReductionPHIRecipe>(PhiR) &&
1031
+ cast<VPReductionPHIRecipe>(PhiR)-> isInLoop ( ));
1032
+
1033
+ Value *Phi = State-> get (PhiR, NeedsScalar);
1034
+ // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1035
+ // not.
1036
+ Value *Val = State->get (PhiR-> getOperand ( 1 ), NeedsScalar );
1037
+ cast<PHINode>(Phi)-> addIncoming (Val, VectorLatchBB) ;
1004
1038
}
1005
-
1006
- auto *PhiR = cast<VPSingleDefRecipe>(&R);
1007
- // VPInstructions currently model scalar Phis only.
1008
- bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1009
- (isa<VPReductionPHIRecipe>(PhiR) &&
1010
- cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1011
- Value *Phi = State->get (PhiR, NeedsScalar);
1012
- // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1013
- Value *Val = State->get (PhiR->getOperand (1 ), NeedsScalar);
1014
- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1015
1039
}
1016
1040
}
1017
1041
@@ -1363,16 +1387,16 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
1363
1387
1364
1388
#endif
1365
1389
1366
- // / Returns true if there is a vector loop region and \p VPV is defined in a
1367
- // / loop region.
1368
- static bool isDefinedInsideLoopRegions (const VPValue *VPV) {
1369
- const VPRecipeBase *DefR = VPV->getDefiningRecipe ();
1370
- return DefR && (!DefR->getParent ()->getPlan ()->getVectorLoopRegion () ||
1371
- DefR->getParent ()->getEnclosingLoopRegion ());
1372
- }
1373
-
1374
1390
bool VPValue::isDefinedOutsideLoopRegions () const {
1375
- return !isDefinedInsideLoopRegions (this );
1391
+ auto *DefR = getDefiningRecipe ();
1392
+ if (!DefR)
1393
+ return true ;
1394
+
1395
+ const VPBasicBlock *DefVPBB = DefR->getParent ();
1396
+ auto *Plan = DefVPBB->getPlan ();
1397
+ if (Plan->getVectorLoopRegion ())
1398
+ return !DefR->getParent ()->getEnclosingLoopRegion ();
1399
+ return DefVPBB == Plan->getEntry ();
1376
1400
}
1377
1401
void VPValue::replaceAllUsesWith (VPValue *New) {
1378
1402
replaceUsesWithIf (New, [](VPUser &, unsigned ) { return true ; });
0 commit comments