-
Notifications
You must be signed in to change notification settings - Fork 13.4k
AMDGPU: Update live intervals in convertToThreeAddress #104610
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Update live intervals in convertToThreeAddress #104610
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFixes #98741 Full diff: https://github.com/llvm/llvm-project/pull/104610.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9147242046ceda..0e3a12d49137cc 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3947,14 +3947,32 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() ||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
MachineInstr *DefMI;
- const auto killDef = [&]() -> void {
+ const auto killDef = [&](SlotIndex NewIdx) -> void {
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// The only user is the instruction which will be killed.
Register DefReg = DefMI->getOperand(0).getReg();
+
+ if (LIS) {
+ LiveInterval &DefLI = LIS->getInterval(DefReg);
+ LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(NewIdx);
+
+ if (OldSeg->end == NewIdx.getRegSlot()) {
+ DefLI.removeSegment(OldSeg->start, NewIdx.getRegSlot(), true);
+
+ for (auto &SR : DefLI.subranges()) {
+ LiveRange::Segment *OldSegSR = SR.getSegmentContaining(NewIdx);
+ SR.removeSegment(OldSegSR->start, NewIdx.getRegSlot(), true);
+ }
+
+ DefLI.removeEmptySubRanges();
+ }
+ }
+
if (!MRI.hasOneNonDBGUse(DefReg))
return;
// We cannot just remove the DefMI here, calling pass will crash.
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
+ DefMI->getOperand(0).setIsDead(true);
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
DefMI->removeOperand(I);
if (LV)
@@ -3976,9 +3994,10 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.addImm(Imm)
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
+ SlotIndex NewIdx;
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef(NewIdx);
return MIB;
}
}
@@ -3996,9 +4015,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Src2)
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
+
+ SlotIndex NewIdx;
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- killDef();
+ NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ killDef(NewIdx);
return MIB;
}
}
@@ -4018,10 +4039,12 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
.add(*Src2)
.setMIFlags(MI.getFlags());
updateLiveVariables(LV, MI, *MIB);
+
+ SlotIndex NewIdx;
if (LIS)
- LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+ NewIdx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
if (DefMI)
- killDef();
+ killDef(NewIdx);
return MIB;
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
index 0f20b8a2f1e29f..bde2ae13c4beab 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir
@@ -1,29 +1,120 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s --passes=two-address-instruction -o - | FileCheck --check-prefixes=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass liveintervals -run-pass twoaddressinstruction -verify-machineinstrs %s -o -
# GFX10-LABEL: name: test_fmamk_reg_imm_f16
-# GFX10: %2:vgpr_32 = IMPLICIT_DEF
+# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
name: test_fmamk_reg_imm_f16
-registers:
- - { id: 0, class: vreg_64 }
- - { id: 1, class: vgpr_32 }
- - { id: 2, class: vgpr_32 }
- - { id: 3, class: vgpr_32 }
body: |
bb.0:
- %0 = IMPLICIT_DEF
- %1 = COPY %0.sub1
- %2 = V_MOV_B32_e32 1078523331, implicit $exec
- %3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f16__imm_is_subreg
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ undef %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
+# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
+---
+name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec
+ %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f16__use_imm_before_mac
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ S_NOP 0, implicit %2
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_after_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+---
+name: test_fmamk_reg_imm_f16__use_imm_after_mac
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+ S_NOP 0, implicit %2
+
+...
+
+# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
+# GFX10: %0:vreg_64 = IMPLICIT_DEF
+# GFX10: %1:vgpr_32 = COPY %0.sub1
+# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
+# GFX10: S_NOP 0, implicit %2
+
+---
+name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
+body: |
+ bb.0:
+
+ %0:vreg_64 = IMPLICIT_DEF
+ %1:vgpr_32 = COPY %0.sub1
+ %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
+ S_NOP 0, implicit %2
+ %3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
+ S_NOP 0, implicit %2
...
# GFX10-LABEL: name: test_fmamk_imm_reg_f16
-# GFX10: %2:vgpr_32 = IMPLICIT_DEF
+# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
# GFX10-NOT: V_MOV_B32
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
---
|
db8d4ae
to
807378a
Compare
if (!MRI.hasOneNonDBGUse(DefReg)) | ||
return; | ||
// We cannot just remove the DefMI here, calling pass will crash. | ||
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF)); | ||
DefMI->getOperand(0).setIsDead(true); | ||
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I) | ||
DefMI->removeOperand(I); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible that we actually need to update intervals for any of these operands being removed as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The operand this should be removing is exec, so no. In principle there could be other implicit use operands, but they add a lot of complexity for no reason so I would hope movs with implicit operands would have been rejected as fold candidates
807378a
to
5b1d897
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with the one comment about variable naming
@@ -3947,14 +3947,34 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, | |||
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || | |||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { | |||
MachineInstr *DefMI; | |||
const auto killDef = [&]() -> void { | |||
const auto killDef = [&](SlotIndex OldDefIdx) -> void { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This variable should be called OldUseIdx
.
5b1d897
to
5ae0d57
Compare
@@ -3934,18 +3934,38 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, | |||
(ST.getConstantBusLimit(Opc) > 1 || !Src0->isReg() || | |||
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) { | |||
MachineInstr *DefMI; | |||
const auto killDef = [&]() -> void { | |||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |||
const auto killDef = [&](SlotIndex OldUseIdx) -> void { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OldUseIdx
is no longer required in this version.
if (LIS) { | ||
LiveInterval &DefLI = LIS->getInterval(DefReg); | ||
|
||
// We cannot delete the original instruction here, so hack out the use |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you add a test that demonstrates this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is already covered by several tests (the problematic cases were any of the ones with other uses)
5ae0d57
to
7a56477
Compare
Fixes #98741