Skip to content

Commit 2e1718a

Browse files
committed
Reland "AMDGPU: Duplicate instead of COPY constants from VGPR to SGPR (#66882)"
Teach the si-fix-sgpr-copies pass to deal with REG_SEQUENCE, PHI or INSERT_SUBREG where the result is an SGPR, but some of the inputs are constants materialized into VGPRs. This may happen in cases where for instance several instructions use an immediate zero and SelectionDAG chooses to put it in a VGPR to satisfy all of them. This however causes the si-fix-sgpr-copies to try to switch the whole chain to VGPR and may lead to illegal VGPR-to-SGPR copies. Rematerializing the constant into an SGPR fixes the issue. This was originally reverted because it triggered an unrelated bug in PEI on one of the OpenMP buildbots. That bug has been fixed in #68299, so it should be ok to try again.
1 parent fad0919 commit 2e1718a

File tree

2 files changed

+103
-26
lines changed

2 files changed

+103
-26
lines changed

llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp

+48-26
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,13 @@ class SIFixSGPRCopies : public MachineFunctionPass {
152152

153153
void processPHINode(MachineInstr &MI);
154154

155+
// Check if MO is an immediate materialized into a VGPR, and if so replace it
156+
// with an SGPR immediate. The VGPR immediate is also deleted if it does not
157+
// have any other uses.
158+
bool tryMoveVGPRConstToSGPR(MachineOperand &MO, Register NewDst,
159+
MachineBasicBlock *BlockToInsertTo,
160+
MachineBasicBlock::iterator PointToInsertTo);
161+
155162
StringRef getPassName() const override { return "SI Fix SGPR copies"; }
156163

157164
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -662,13 +669,17 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
662669
: MBB;
663670
MachineBasicBlock::iterator PointToInsertCopy =
664671
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
665-
MachineInstr *NewCopy =
666-
BuildMI(*BlockToInsertCopy, PointToInsertCopy,
667-
PointToInsertCopy->getDebugLoc(),
668-
TII->get(AMDGPU::COPY), NewDst)
669-
.addReg(MO.getReg());
670-
MO.setReg(NewDst);
671-
analyzeVGPRToSGPRCopy(NewCopy);
672+
673+
if (!tryMoveVGPRConstToSGPR(MO, NewDst, BlockToInsertCopy,
674+
PointToInsertCopy)) {
675+
MachineInstr *NewCopy =
676+
BuildMI(*BlockToInsertCopy, PointToInsertCopy,
677+
PointToInsertCopy->getDebugLoc(),
678+
TII->get(AMDGPU::COPY), NewDst)
679+
.addReg(MO.getReg());
680+
MO.setReg(NewDst);
681+
analyzeVGPRToSGPRCopy(NewCopy);
682+
}
672683
}
673684
}
674685
}
@@ -829,6 +840,32 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
829840
}
830841
}
831842

843+
bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR(
844+
MachineOperand &MaybeVGPRConstMO, Register DstReg,
845+
MachineBasicBlock *BlockToInsertTo,
846+
MachineBasicBlock::iterator PointToInsertTo) {
847+
848+
MachineInstr *DefMI = MRI->getVRegDef(MaybeVGPRConstMO.getReg());
849+
if (!DefMI || !DefMI->isMoveImmediate())
850+
return false;
851+
852+
MachineOperand *SrcConst = TII->getNamedOperand(*DefMI, AMDGPU::OpName::src0);
853+
if (SrcConst->isReg())
854+
return false;
855+
856+
const TargetRegisterClass *SrcRC =
857+
MRI->getRegClass(MaybeVGPRConstMO.getReg());
858+
unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
859+
unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
860+
BuildMI(*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc(),
861+
TII->get(MoveOp), DstReg)
862+
.add(*SrcConst);
863+
if (MRI->hasOneUse(MaybeVGPRConstMO.getReg()))
864+
DefMI->eraseFromParent();
865+
MaybeVGPRConstMO.setReg(DstReg);
866+
return true;
867+
}
868+
832869
bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
833870
MachineBasicBlock::iterator &I) {
834871
Register DstReg = MI.getOperand(0).getReg();
@@ -846,25 +883,10 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
846883
TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
847884
.add(MI.getOperand(1));
848885
MI.getOperand(1).setReg(TmpReg);
849-
} else {
850-
MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
851-
if (DefMI && DefMI->isMoveImmediate()) {
852-
MachineOperand SrcConst = DefMI->getOperand(AMDGPU::getNamedOperandIdx(
853-
DefMI->getOpcode(), AMDGPU::OpName::src0));
854-
if (!SrcConst.isReg()) {
855-
const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
856-
unsigned MoveSize = TRI->getRegSizeInBits(*SrcRC);
857-
unsigned MoveOp =
858-
MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
859-
BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(MoveOp),
860-
DstReg)
861-
.add(SrcConst);
862-
I = std::next(I);
863-
if (MRI->hasOneUse(SrcReg))
864-
DefMI->eraseFromParent();
865-
MI.eraseFromParent();
866-
}
867-
}
886+
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
887+
MI)) {
888+
I = std::next(I);
889+
MI.eraseFromParent();
868890
}
869891
return true;
870892
}

llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir

+55
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,62 @@ body: |
125125
%2:sreg_32 = S_MOV_B32 1
126126
%3:sreg_32 = COPY %1:vgpr_32
127127
%4:sreg_32 = S_CSELECT_B32 killed %2:sreg_32, killed %3:sreg_32, implicit undef $scc
128+
...
129+
130+
---
131+
# Test that the VGPR immediate is replaced with an SGPR one.
132+
# GCN-LABEL: name: reg_sequence_vgpr_immediate
133+
# GCN: [[A_SGPR:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
134+
# GCN-NEXT: [[VGPR_CONST:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 37
135+
# GCN-NEXT: [[SGPR_CONST:%[0-9]+]]:sgpr_32 = S_MOV_B32 37
136+
# GCN-NEXT: {{%[0-9]+}}:sreg_64 = REG_SEQUENCE [[SGPR_CONST]], %subreg.sub0, [[A_SGPR]], %subreg.sub1
137+
name: reg_sequence_vgpr_immediate
138+
body: |
139+
bb.0:
140+
%0:sreg_32 = IMPLICIT_DEF
141+
%1:vgpr_32 = V_MOV_B32_e32 37, implicit $exec
142+
%2:sreg_64 = REG_SEQUENCE %1:vgpr_32, %subreg.sub0, %0:sreg_32, %subreg.sub1
143+
144+
%3:vgpr_32 = V_ADD_U32_e32 %1:vgpr_32, %1:vgpr_32, implicit $exec
145+
...
146+
147+
---
148+
# GCN-LABEL: name: insert_subreg_vgpr_immediate
149+
# GCN: [[DST:%[0-9]+]]:sgpr_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr0, %subreg.sub2
150+
# GCN-NEXT: [[SGPR_CONST:%[0-9]+]]:sgpr_32 = S_MOV_B32 43
151+
# GCN-NEXT: {{%[0-9]+}}:sgpr_128 = INSERT_SUBREG [[DST]], [[SGPR_CONST]], %subreg.sub3
152+
name: insert_subreg_vgpr_immediate
153+
body: |
154+
bb.0:
155+
%0:sgpr_128 = REG_SEQUENCE $sgpr0, %subreg.sub0, $sgpr0, %subreg.sub2
156+
%1:vgpr_32 = V_MOV_B32_e32 43, implicit $exec
157+
%2:sgpr_128 = INSERT_SUBREG %0, %1, %subreg.sub3
158+
...
159+
128160
---
161+
# GCN-LABEL: name: phi_vgpr_immediate
162+
# GCN: bb.1:
163+
# GCN: [[SGPR:%[0-9]+]]:sgpr_32 = S_MOV_B32 51
164+
# GCN: bb.2:
165+
# GCN: IMPLICIT_DEF
166+
# GCN: bb.3:
167+
# GCN: sreg_32 = PHI [[SGPR]], %bb.1
168+
name: phi_vgpr_immediate
169+
tracksRegLiveness: true
170+
body: |
171+
bb.0:
172+
S_CBRANCH_SCC1 %bb.2, implicit undef $scc
173+
174+
bb.1:
175+
%0:vgpr_32 = V_MOV_B32_e32 51, implicit $exec
176+
S_BRANCH %bb.3
177+
178+
bb.2:
179+
%1:sreg_32 = IMPLICIT_DEF
180+
S_BRANCH %bb.3
181+
182+
bb.3:
183+
%2:sreg_32 = PHI %0:vgpr_32, %bb.1, %1:sreg_32, %bb.2
129184
130185
---
131186
name: cmp_f32

0 commit comments

Comments
 (0)