Skip to content

[MachineSink] Fix missing sinks along critical edges #97618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 66 additions & 15 deletions llvm/lib/CodeGen/MachineSink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,14 @@ namespace {
// Remember which edges have been considered for breaking.
SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
CEBCandidates;
// Memorize the register that also wanted to sink into the same block along
// a different critical edge.
// {register to sink, sink-to block} -> the first sink-from block.
// We're recording the first sink-from block because that (critical) edge
// was deferred until we see another register that's going to sink into the
// same block.
DenseMap<std::pair<Register, MachineBasicBlock *>, MachineBasicBlock *>
CEMergeCandidates;
// Remember which edges we are about to split.
// This is different from CEBCandidates since those edges
// will be split.
Expand Down Expand Up @@ -197,14 +205,17 @@ namespace {

void releaseMemory() override {
CEBCandidates.clear();
CEMergeCandidates.clear();
}

private:
bool ProcessBlock(MachineBasicBlock &MBB);
void ProcessDbgInst(MachineInstr &MI);
bool isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To);
bool isLegalToBreakCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
MachineBasicBlock *To, bool BreakPHIEdge);
bool isWorthBreakingCriticalEdge(MachineInstr &MI, MachineBasicBlock *From,
MachineBasicBlock *To,
MachineBasicBlock *&DeferredFromBlock);

bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To,
MachineInstr &MI);
Expand Down Expand Up @@ -725,6 +736,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {

// Process all basic blocks.
CEBCandidates.clear();
CEMergeCandidates.clear();
ToSplit.clear();
for (auto &MBB: MF)
MadeChange |= ProcessBlock(MBB);
Expand Down Expand Up @@ -873,9 +885,9 @@ void MachineSinking::ProcessDbgInst(MachineInstr &MI) {
SeenDbgVars.insert(Var);
}

bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
MachineBasicBlock *From,
MachineBasicBlock *To) {
bool MachineSinking::isWorthBreakingCriticalEdge(
MachineInstr &MI, MachineBasicBlock *From, MachineBasicBlock *To,
MachineBasicBlock *&DeferredFromBlock) {
// FIXME: Need much better heuristics.

// If the pass has already considered breaking this edge (during this pass
Expand All @@ -887,6 +899,27 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
return true;

// Check and record the register and the destination block we want to sink
// into. Note that we want to do the following before the next check on branch
// probability. Because we want to record the initial candidate even if it's
// on hot edge, so that other candidates that might not on hot edges can be
// sinked as well.
for (const auto &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (!Reg)
continue;
Register SrcReg = Reg.isVirtual() ? TRI->lookThruCopyLike(Reg, MRI) : Reg;
auto Key = std::make_pair(SrcReg, To);
auto Res = CEMergeCandidates.try_emplace(Key, From);
// We wanted to sink the same register into the same block, consider it to
// be profitable.
if (!Res.second) {
// Return the source block that was previously held off.
DeferredFromBlock = Res.first->second;
return true;
}
}

if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
BranchProbability(SplitEdgeProbabilityThreshold, 100))
return true;
Expand Down Expand Up @@ -921,13 +954,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
return false;
}

bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
MachineBasicBlock *FromBB,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
return false;

bool MachineSinking::isLegalToBreakCriticalEdge(MachineInstr &MI,
MachineBasicBlock *FromBB,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
// Avoid breaking back edge. From == To means backedge for single BB cycle.
if (!SplitEdges || FromBB == ToBB)
return false;
Expand Down Expand Up @@ -985,11 +1015,32 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
return false;
}

ToSplit.insert(std::make_pair(FromBB, ToBB));

return true;
}

bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI,
MachineBasicBlock *FromBB,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
bool Status = false;
MachineBasicBlock *DeferredFromBB = nullptr;
if (isWorthBreakingCriticalEdge(MI, FromBB, ToBB, DeferredFromBB)) {
// If there is a DeferredFromBB, we consider FromBB only if _both_
// of them are legal to split.
if ((!DeferredFromBB ||
ToSplit.count(std::make_pair(DeferredFromBB, ToBB)) ||
isLegalToBreakCriticalEdge(MI, DeferredFromBB, ToBB, BreakPHIEdge)) &&
isLegalToBreakCriticalEdge(MI, FromBB, ToBB, BreakPHIEdge)) {
ToSplit.insert(std::make_pair(FromBB, ToBB));
if (DeferredFromBB)
ToSplit.insert(std::make_pair(DeferredFromBB, ToBB));
Status = true;
}
}

return Status;
}

std::vector<unsigned> &
MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) {
// Currently to save compiling time, MBB's register pressure will not change
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/AArch64/and-sink.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,27 +46,24 @@ bb2:
define dso_local i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-LABEL: and_sink2:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, wzr
; CHECK-NEXT: adrp x9, A
; CHECK-NEXT: str wzr, [x9, :lo12:A]
; CHECK-NEXT: adrp x8, A
; CHECK-NEXT: str wzr, [x8, :lo12:A]
; CHECK-NEXT: tbz w1, #0, .LBB1_5
; CHECK-NEXT: // %bb.1: // %bb0.preheader
; CHECK-NEXT: adrp x8, B
; CHECK-NEXT: adrp x9, C
; CHECK-NEXT: .LBB1_2: // %bb0
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: str wzr, [x8, :lo12:B]
; CHECK-NEXT: tbz w2, #0, .LBB1_6
; CHECK-NEXT: tbz w2, #0, .LBB1_5
; CHECK-NEXT: // %bb.3: // %bb1
; CHECK-NEXT: // in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: str wzr, [x9, :lo12:C]
; CHECK-NEXT: tbnz w0, #2, .LBB1_2
; CHECK-NEXT: // %bb.4:
; CHECK-NEXT: mov w8, #1 // =0x1
; CHECK-NEXT: .LBB1_5: // %common.ret
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: mov w0, #1 // =0x1
; CHECK-NEXT: ret
; CHECK-NEXT: .LBB1_6:
; CHECK-NEXT: .LBB1_5:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret

Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
define i64 @test_or(i32 %a, i32 %b) {
; CHECK-LABEL: test_or:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB0_2
; CHECK-NEXT: LBB0_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB0_2
; CHECK-NEXT: LBB0_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB0_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand All @@ -37,13 +35,11 @@ bb4:
define i64 @test_or_select(i32 %a, i32 %b) {
; CHECK-LABEL: test_or_select:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB1_2
; CHECK-NEXT: LBB1_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB1_2
; CHECK-NEXT: LBB1_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB1_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB1_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand All @@ -70,13 +66,11 @@ bb4:
define i64 @test_and(i32 %a, i32 %b) {
; CHECK-LABEL: test_and:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB2_2
; CHECK-NEXT: LBB2_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB2_2
; CHECK-NEXT: LBB2_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB2_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB2_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand All @@ -103,13 +97,11 @@ bb4:
define i64 @test_and_select(i32 %a, i32 %b) {
; CHECK-LABEL: test_and_select:
; CHECK: ; %bb.0: ; %bb1
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: cbnz w0, LBB3_2
; CHECK-NEXT: LBB3_1:
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbnz w8, LBB3_2
; CHECK-NEXT: LBB3_1: ; %common.ret
; CHECK-NEXT: ret
; CHECK-NEXT: LBB3_2: ; %bb1.cond.split
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: cbz w1, LBB3_1
; CHECK-NEXT: ; %bb.3: ; %bb4
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
Expand Down
34 changes: 16 additions & 18 deletions llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-NEXT: mv s0, a0
; CHECK-NEXT: call toupper
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz s0, .LBB0_25
; CHECK-NEXT: beqz s0, .LBB0_26
; CHECK-NEXT: # %bb.1: # %while.body.preheader
; CHECK-NEXT: li a2, 1
; CHECK-NEXT: li a3, 9
Expand Down Expand Up @@ -55,36 +55,34 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: beq a2, a3, .LBB0_2
; CHECK-NEXT: # %bb.14: # %while.body.6
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.15: # %strdup.exit.split.loop.exit126
; CHECK-NEXT: addi s0, s1, 7
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: .LBB0_16: # %while.body
; CHECK-NEXT: bnez a2, .LBB0_18
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_16: # %while.body
; CHECK-NEXT: beqz a2, .LBB0_26
; CHECK-NEXT: j .LBB0_18
; CHECK-NEXT: .LBB0_17: # %while.body.1
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: .LBB0_18: # %strdup.exit.loopexit
; CHECK-NEXT: li s0, 0
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_19: # %while.body.3
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.20: # %strdup.exit.split.loop.exit120
; CHECK-NEXT: addi s0, s1, 4
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_21: # %while.body.4
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: beqz a2, .LBB0_24
; CHECK-NEXT: # %bb.22: # %strdup.exit.split.loop.exit122
; CHECK-NEXT: addi s0, s1, 5
; CHECK-NEXT: j .LBB0_24
; CHECK-NEXT: j .LBB0_25
; CHECK-NEXT: .LBB0_23: # %while.body.5
; CHECK-NEXT: bnez a2, .LBB0_25
; CHECK-NEXT: .LBB0_24:
; CHECK-NEXT: li a1, 0
; CHECK-NEXT: beqz a2, .LBB0_25
; CHECK-NEXT: .LBB0_24: # %strdup.exit
; CHECK-NEXT: j .LBB0_26
; CHECK-NEXT: .LBB0_25: # %strdup.exit
; CHECK-NEXT: li s1, 0
; CHECK-NEXT: mv s2, a0
; CHECK-NEXT: li a0, 0
Expand All @@ -95,7 +93,7 @@ define i1 @sink_li(ptr %text, ptr %text.addr.0) nounwind {
; CHECK-NEXT: li a2, 0
; CHECK-NEXT: jalr s1
; CHECK-NEXT: li a1, 1
; CHECK-NEXT: .LBB0_25: # %return
; CHECK-NEXT: .LBB0_26: # %return
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
Expand Down
Loading