Skip to content

MachineLICM: Allow hoisting REG_SEQUENCE #90638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions llvm/lib/CodeGen/MachineLICM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1264,25 +1264,32 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI,

// If we have a COPY with other uses in the loop, hoist to allow the users to
// also be hoisted.
Register DefReg;
if (MI.isCopy() && (DefReg = MI.getOperand(0).getReg()).isVirtual() &&
MI.getOperand(1).getReg().isVirtual() &&
IsLoopInvariantInst(MI, CurLoop) &&
any_of(MRI->use_nodbg_instructions(MI.getOperand(0).getReg()),
[&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) {
if (!CurLoop->contains(&UseMI))
return false;

// COPY is a cheap instruction, but if moving it won't cause high
// RP we're fine to hoist it even if the user can't be hoisted
// later Otherwise we want to check the user if it's hoistable
if (CanCauseHighRegPressure(Cost, false) &&
!CurLoop->isLoopInvariant(UseMI, DefReg))
return false;

return true;
}))
return true;
// TODO: Handle all isCopyLike?
if (MI.isCopy() || MI.isRegSequence()) {
Register DefReg = MI.getOperand(0).getReg();
if (DefReg.isVirtual() &&
all_of(MI.uses(),
[](const MachineOperand &UseOp) {
return !UseOp.isReg() || UseOp.getReg().isVirtual();
}) &&
IsLoopInvariantInst(MI, CurLoop) &&
any_of(MRI->use_nodbg_instructions(DefReg),
[&CurLoop, this, DefReg, Cost](MachineInstr &UseMI) {
if (!CurLoop->contains(&UseMI))
return false;

// COPY is a cheap instruction, but if moving it won't cause
// high RP we're fine to hoist it even if the user can't be
// hoisted later Otherwise we want to check the user if it's
// hoistable
if (CanCauseHighRegPressure(Cost, false) &&
!CurLoop->isLoopInvariant(UseMI, DefReg))
return false;

return true;
}))
return true;
}

// High register pressure situation, only hoist if the instruction is going
// to be remat'ed.
Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8907,17 +8907,17 @@ define amdgpu_kernel void @atomic_min_i64(ptr addrspace(1) %out, i64 %in) {
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx2 s[10:11], s[0:1], 0x0
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: v_mov_b32_e32 v4, s3
; SI-NEXT: v_mov_b32_e32 v5, s2
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v2, s10
; SI-NEXT: v_mov_b32_e32 v3, s11
; SI-NEXT: v_mov_b32_e32 v2, s4
; SI-NEXT: v_mov_b32_e32 v3, s5
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_mov_b32 s4, s0
; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: .LBB127_1: ; %atomicrmw.start
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: v_cmp_ge_i64_e32 vcc, s[2:3], v[2:3]
Expand Down
134 changes: 134 additions & 0 deletions llvm/test/CodeGen/AMDGPU/machinelicm-copy-like-instrs.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=amdgcn -run-pass=early-machinelicm -simplify-mir -o - %s | FileCheck %s

# Test to check machine LICM does not hoist convergent instructions,
# DS_PERMUTE_B32 in this example.

---
name: licm_reg_sequence
body: |
; CHECK-LABEL: name: licm_reg_sequence
; CHECK: bb.0:
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]]
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $vgpr1
successors: %bb.1

%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1

bb.1:
successors: %bb.1, %bb.2

%3:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1
S_NOP 0, implicit %3
S_CBRANCH_SCC1 %bb.1, implicit undef $scc
S_BRANCH %bb.2

bb.2:
$vgpr0 = COPY %3
S_ENDPGM 0

...

# Don't bother handling reg_sequence with physreg uses (is there any
# reason for these to be legal)?
---
name: licm_reg_sequence_physreg_use
body: |
; CHECK-LABEL: name: licm_reg_sequence_physreg_use
; CHECK: bb.0:
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, $vgpr1, %subreg.sub1
; CHECK-NEXT: S_NOP 0, implicit [[REG_SEQUENCE]]
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $vgpr0 = COPY [[REG_SEQUENCE]]
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $vgpr1
successors: %bb.1

%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1

bb.1:
successors: %bb.1, %bb.2
liveins: $vgpr0

%3:vreg_64 = REG_SEQUENCE %0, %subreg.sub0, $vgpr1, %subreg.sub1
S_NOP 0, implicit %3
S_CBRANCH_SCC1 %bb.1, implicit undef $scc
S_BRANCH %bb.2

bb.2:
$vgpr0 = COPY %3
S_ENDPGM 0

...

---
name: licm_insert_subreg
body: |
; CHECK-LABEL: name: licm_insert_subreg
; CHECK: bb.0:
; CHECK-NEXT: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[DEF]], [[COPY]], %subreg.sub0
; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vreg_64 = INSERT_SUBREG [[INSERT_SUBREG]], [[COPY1]], %subreg.sub1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: S_NOP 0, implicit [[INSERT_SUBREG1]]
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; CHECK-NEXT: S_BRANCH %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[INSERT_SUBREG1]]
; CHECK-NEXT: S_ENDPGM 0
bb.0:
liveins: $vgpr0, $vgpr1
successors: %bb.1

%0:vgpr_32 = COPY $vgpr0
%1:vgpr_32 = COPY $vgpr1

bb.1:
successors: %bb.1, %bb.2

%3:vreg_64 = IMPLICIT_DEF
%4:vreg_64 = INSERT_SUBREG %3, %0, %subreg.sub0
%5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1
S_NOP 0, implicit %5
S_CBRANCH_SCC1 %bb.1, implicit undef $scc
S_BRANCH %bb.2

bb.2:
$vgpr0_vgpr1 = COPY %5
S_ENDPGM 0

...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s10, -1
; GCN-NEXT: s_mov_b32 s6, 0
; GCN-NEXT: s_mov_b32 s11, s7
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: s_mov_b32 s8, s4
; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_branch .LBB0_2
; GCN-NEXT: .LBB0_1: ; %loop.exit.guard
Expand All @@ -20,7 +21,6 @@ define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
; GCN-NEXT: .LBB0_2: ; %bb1
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB0_4 Depth 2
; GCN-NEXT: s_mov_b32 s11, s7
; GCN-NEXT: buffer_load_dword v1, off, s[8:11], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, v1
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/Hexagon/expand-vstorerw-undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ b18: ; preds = %b16, %b7
br label %b22

b21: ; preds = %b22
store volatile <64 x i32> %v20, ptr null
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated change?

tail call void @sammy() #3
br label %b7

Expand Down