-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU] Change CF intrinsics lowering to reconverge on predecessors. #92809
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ | |
#include "GCNSubtarget.h" | ||
#include "llvm/Analysis/LoopInfo.h" | ||
#include "llvm/Analysis/UniformityAnalysis.h" | ||
#include "llvm/Analysis/DomTreeUpdater.h" | ||
#include "llvm/CodeGen/TargetPassConfig.h" | ||
#include "llvm/IR/BasicBlock.h" | ||
#include "llvm/IR/Constants.h" | ||
|
@@ -53,7 +54,7 @@ class SIAnnotateControlFlow : public FunctionPass { | |
Function *Else; | ||
Function *IfBreak; | ||
Function *Loop; | ||
Function *EndCf; | ||
Function *WaveReconverge; | ||
|
||
DominatorTree *DT; | ||
StackVector Stack; | ||
|
@@ -86,7 +87,7 @@ class SIAnnotateControlFlow : public FunctionPass { | |
|
||
bool handleLoop(BranchInst *Term); | ||
|
||
bool closeControlFlow(BasicBlock *BB); | ||
bool tryWaveReconverge(BasicBlock *BB); | ||
|
||
public: | ||
static char ID; | ||
|
@@ -141,7 +142,7 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) { | |
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break, | ||
{ IntMask }); | ||
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask }); | ||
EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask }); | ||
WaveReconverge = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_wave_reconverge, { IntMask }); | ||
} | ||
|
||
/// Is the branch condition uniform or did the StructurizeCFG pass | ||
|
@@ -203,8 +204,6 @@ bool SIAnnotateControlFlow::eraseIfUnused(PHINode *Phi) { | |
|
||
/// Open a new "If" block | ||
bool SIAnnotateControlFlow::openIf(BranchInst *Term) { | ||
if (isUniform(Term)) | ||
return false; | ||
|
||
IRBuilder<> IRB(Term); | ||
Value *IfCall = IRB.CreateCall(If, {Term->getCondition()}); | ||
|
@@ -305,43 +304,43 @@ bool SIAnnotateControlFlow::handleLoop(BranchInst *Term) { | |
} | ||
|
||
/// Close the last opened control flow | ||
bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) { | ||
llvm::Loop *L = LI->getLoopFor(BB); | ||
bool SIAnnotateControlFlow::tryWaveReconverge(BasicBlock *BB) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function is one core part of this change. It would be nice to have more comment with examples before the function header showing when and where wave_converge is inserted. |
||
|
||
assert(Stack.back().first == BB); | ||
if (succ_empty(BB)) | ||
return false; | ||
|
||
if (L && L->getHeader() == BB) { | ||
// We can't insert an EndCF call into a loop header, because it will | ||
// get executed on every iteration of the loop, when it should be | ||
// executed only once before the loop. | ||
SmallVector <BasicBlock *, 8> Latches; | ||
L->getLoopLatches(Latches); | ||
BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); | ||
if (Term->getNumSuccessors() == 1) { | ||
// The current BBs single successor is a top of the stack. We need to | ||
// reconverge over thaqt path. | ||
BasicBlock *SingleSucc = *succ_begin(BB); | ||
BasicBlock::iterator InsPt = Term ? BasicBlock::iterator(Term) : BB->end(); | ||
|
||
SmallVector<BasicBlock *, 2> Preds; | ||
for (BasicBlock *Pred : predecessors(BB)) { | ||
if (!is_contained(Latches, Pred)) | ||
Preds.push_back(Pred); | ||
if (isTopOfStack(SingleSucc)) { | ||
Value *Exec = Stack.back().second; | ||
IRBuilder<>(BB, InsPt).CreateCall(WaveReconverge, {Exec}); | ||
} | ||
|
||
BB = SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, nullptr, | ||
false); | ||
} | ||
|
||
Value *Exec = popSaved(); | ||
BasicBlock::iterator FirstInsertionPt = BB->getFirstInsertionPt(); | ||
if (!isa<UndefValue>(Exec) && !isa<UnreachableInst>(FirstInsertionPt)) { | ||
Instruction *ExecDef = cast<Instruction>(Exec); | ||
BasicBlock *DefBB = ExecDef->getParent(); | ||
if (!DT->dominates(DefBB, BB)) { | ||
// Split edge to make Def dominate Use | ||
FirstInsertionPt = SplitEdge(DefBB, BB, DT, LI)->getFirstInsertionPt(); | ||
} else { | ||
// We have a uniform conditional branch terminating the block. | ||
// THis block may be the last in the Then path of the enclosing divergent | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Typo 'THis' |
||
// IF. | ||
if (!isUniform(Term)) | ||
// Divergent loop is going to be further processed in another place | ||
return false; | ||
|
||
for (auto Succ : Term->successors()) { | ||
if (isTopOfStack(Succ)) { | ||
// Just split to make a room for further WAVE_RECONVERGE insertion | ||
SmallVector<BasicBlock*, 2> Preds; | ||
for (auto P : predecessors(Succ)) { | ||
if (DT->dominates(BB, P)) | ||
Preds.push_back(P); | ||
} | ||
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); | ||
SplitBlockPredecessors(Succ, Preds, ".reconverge", &DTU, LI, | ||
nullptr, false); | ||
} | ||
} | ||
IRBuilder<> IRB(FirstInsertionPt->getParent(), FirstInsertionPt); | ||
// TODO: StructurizeCFG 'Flow' blocks have debug locations from the | ||
// condition, for now just avoid copying these DebugLocs so that stepping | ||
// out of the then/else block in a debugger doesn't step to the condition. | ||
IRB.SetCurrentDebugLocation(DebugLoc()); | ||
IRB.CreateCall(EndCf, {Exec}); | ||
} | ||
|
||
return true; | ||
|
@@ -365,14 +364,20 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { | |
|
||
if (!Term || Term->isUnconditional()) { | ||
if (isTopOfStack(BB)) | ||
Changed |= closeControlFlow(BB); | ||
Stack.pop_back(); | ||
|
||
Changed |= tryWaveReconverge(BB); | ||
|
||
continue; | ||
} | ||
|
||
if (I.nodeVisited(Term->getSuccessor(1))) { | ||
if (isTopOfStack(BB)) | ||
Changed |= closeControlFlow(BB); | ||
Stack.pop_back(); | ||
|
||
// Let's take care of uniform loop latch that may be closing the Then | ||
// path of the enclosing divergent branch. | ||
Changed |= tryWaveReconverge(BB); | ||
|
||
if (DT->dominates(Term->getSuccessor(1), BB)) | ||
Changed |= handleLoop(Term); | ||
|
@@ -387,10 +392,15 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) { | |
continue; | ||
} | ||
|
||
Changed |= closeControlFlow(BB); | ||
Stack.pop_back(); | ||
} | ||
|
||
Changed |= openIf(Term); | ||
if (isUniform(Term)) | ||
// Uniform conditional branch may be in the block that closes the Then | ||
// path of the divergent conditional branch. | ||
Changed |= tryWaveReconverge(BB); | ||
else | ||
Changed |= openIf(Term); | ||
} | ||
|
||
if (!Stack.empty()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6299,7 +6299,7 @@ unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const { | |
return AMDGPUISD::ELSE; | ||
case Intrinsic::amdgcn_loop: | ||
return AMDGPUISD::LOOP; | ||
case Intrinsic::amdgcn_end_cf: | ||
case Intrinsic::amdgcn_wave_reconverge: | ||
llvm_unreachable("should not occur"); | ||
default: | ||
return 0; | ||
|
@@ -9940,8 +9940,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, | |
|
||
return SDValue(Load, 0); | ||
} | ||
case Intrinsic::amdgcn_end_cf: | ||
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other, | ||
case Intrinsic::amdgcn_wave_reconverge: | ||
return SDValue(DAG.getMachineNode(AMDGPU::SI_WAVE_RECONVERGE, DL, MVT::Other, | ||
Op->getOperand(2), Chain), 0); | ||
case Intrinsic::amdgcn_s_barrier_init: | ||
case Intrinsic::amdgcn_s_barrier_join: | ||
|
@@ -15740,6 +15740,32 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const { | |
} | ||
} | ||
|
||
// ISel inserts copy to regs for the successor PHIs | ||
// at the BB end. We need to move the SI_WAVE_RECONVERGE right before the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you avoid this by gluing the pseudo to the root node? Also, I think you can avoid a second walk over the function by doing this in EmitInstrWithCustomInserter |
||
// branch. | ||
for (auto &MBB : MF) { | ||
for (auto &MI : MBB) { | ||
if (MI.getOpcode() == AMDGPU::SI_WAVE_RECONVERGE) { | ||
MachineBasicBlock::iterator I(MI); | ||
MachineBasicBlock::iterator Next = std::next(I); | ||
bool NeedToMove = false; | ||
while (Next != MBB.end() && !Next->isBranch()) { | ||
NeedToMove = true; | ||
Next++; | ||
} | ||
|
||
assert((Next == MBB.end() || !Next->readsRegister(AMDGPU::SCC, TRI)) && | ||
"Malformed CFG detected!\n"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No newline in the string, this isn't real printing |
||
|
||
if (NeedToMove) { | ||
MBB.splice(Next, &MBB, &MI); | ||
} | ||
|
||
break; | ||
} | ||
} | ||
} | ||
|
||
// FIXME: This is a hack to fixup AGPR classes to use the properly aligned | ||
// classes if required. Ideally the register class constraints would differ | ||
// per-subtarget, but there's no easy way to achieve that right now. This is | ||
|
@@ -16336,7 +16362,7 @@ static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited, | |
default: | ||
Result = false; | ||
break; | ||
case Intrinsic::amdgcn_end_cf: | ||
case Intrinsic::amdgcn_wave_reconverge: | ||
case Intrinsic::amdgcn_loop: | ||
Result = true; | ||
break; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should document what this means
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I second that, all these control-flow pseudo need to have their semantics documented