Skip to content

Commit e9e3868

Browse files
authored
[AMDGPU] Correctly restore FP mode in FDIV32 lowering (#66346)
Addresses the FIXME for both DAGISel and GISel.
1 parent 62dbcc4 commit e9e3868

File tree

4 files changed

+1192
-421
lines changed

4 files changed

+1192
-421
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4657,6 +4657,10 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
46574657
return true;
46584658
}
46594659

4660+
static const unsigned SPDenormModeBitField =
4661+
AMDGPU::Hwreg::ID_MODE | (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
4662+
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
4663+
46604664
// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
46614665
// to enable denorm mode. When 'Enable' is false, disable denorm mode.
46624666
static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
@@ -4675,11 +4679,6 @@ static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
46754679
.addImm(NewDenormModeValue);
46764680

46774681
} else {
4678-
// Select FP32 bit field in mode register.
4679-
unsigned SPDenormModeBitField = AMDGPU::Hwreg::ID_MODE |
4680-
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
4681-
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
4682-
46834682
B.buildInstr(AMDGPU::S_SETREG_IMM32_B32)
46844683
.addImm(SPDenormMode)
46854684
.addImm(SPDenormModeBitField);
@@ -4723,10 +4722,21 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
47234722
.setMIFlags(Flags);
47244723
auto NegDivScale0 = B.buildFNeg(S32, DenominatorScaled, Flags);
47254724

4726-
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
4727-
// aren't modeled as reading it.
4728-
if (Mode.FP32Denormals == DenormalMode::getPreserveSign())
4725+
const bool PreservesDenormals = Mode.FP32Denormals == DenormalMode::getIEEE();
4726+
const bool HasDynamicDenormals =
4727+
(Mode.FP32Denormals.Input == DenormalMode::Dynamic) ||
4728+
(Mode.FP32Denormals.Output == DenormalMode::Dynamic);
4729+
4730+
Register SavedSPDenormMode;
4731+
if (!PreservesDenormals) {
4732+
if (HasDynamicDenormals) {
4733+
SavedSPDenormMode = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
4734+
B.buildInstr(AMDGPU::S_GETREG_B32)
4735+
.addDef(SavedSPDenormMode)
4736+
.addImm(SPDenormModeBitField);
4737+
}
47294738
toggleSPDenormMode(true, B, ST, Mode);
4739+
}
47304740

47314741
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
47324742
auto Fma1 = B.buildFMA(S32, Fma0, ApproxRcp, ApproxRcp, Flags);
@@ -4735,10 +4745,15 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
47354745
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
47364746
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
47374747

4738-
// FIXME: This mishandles dynamic denormal mode. We need to query the
4739-
// current mode and restore the original.
4740-
if (Mode.FP32Denormals == DenormalMode::getPreserveSign())
4741-
toggleSPDenormMode(false, B, ST, Mode);
4748+
if (!PreservesDenormals) {
4749+
if (HasDynamicDenormals) {
4750+
assert(SavedSPDenormMode);
4751+
B.buildInstr(AMDGPU::S_SETREG_B32)
4752+
.addReg(SavedSPDenormMode)
4753+
.addImm(SPDenormModeBitField);
4754+
} else
4755+
toggleSPDenormMode(false, B, ST, Mode);
4756+
}
47424757

47434758
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32})
47444759
.addUse(Fma4.getReg(0))

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9589,28 +9589,44 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
95899589
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
95909590
const DenormalMode DenormMode = Info->getMode().FP32Denormals;
95919591

9592-
const bool HasFP32Denormals = DenormMode != DenormalMode::getPreserveSign();
9592+
const bool PreservesDenormals = DenormMode == DenormalMode::getIEEE();
9593+
const bool HasDynamicDenormals =
9594+
(DenormMode.Input == DenormalMode::Dynamic) ||
9595+
(DenormMode.Output == DenormalMode::Dynamic);
95939596

9594-
if (!HasFP32Denormals) {
9597+
SDValue SavedDenormMode;
9598+
9599+
if (!PreservesDenormals) {
95959600
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
95969601
// lowering. The chain dependence is insufficient, and we need glue. We do
95979602
// not need the glue variants in a strictfp function.
95989603

95999604
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
96009605

9606+
SDValue Glue = DAG.getEntryNode();
9607+
if (HasDynamicDenormals) {
9608+
SDNode *GetReg = DAG.getMachineNode(AMDGPU::S_GETREG_B32, SL,
9609+
DAG.getVTList(MVT::i32, MVT::Glue),
9610+
{BitField, Glue});
9611+
SavedDenormMode = SDValue(GetReg, 0);
9612+
9613+
Glue = DAG.getMergeValues(
9614+
{DAG.getEntryNode(), SDValue(GetReg, 0), SDValue(GetReg, 1)}, SL);
9615+
}
9616+
96019617
SDNode *EnableDenorm;
96029618
if (Subtarget->hasDenormModeInst()) {
96039619
const SDValue EnableDenormValue =
96049620
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget);
96059621

9606-
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
9607-
DAG.getEntryNode(), EnableDenormValue).getNode();
9622+
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, Glue,
9623+
EnableDenormValue)
9624+
.getNode();
96089625
} else {
96099626
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
96109627
SL, MVT::i32);
9611-
EnableDenorm =
9612-
DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
9613-
{EnableDenormValue, BitField, DAG.getEntryNode()});
9628+
EnableDenorm = DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
9629+
{EnableDenormValue, BitField, Glue});
96149630
}
96159631

96169632
SDValue Ops[3] = {
@@ -9640,21 +9656,21 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
96409656
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
96419657
NumeratorScaled, Fma3, Flags);
96429658

9643-
if (!HasFP32Denormals) {
9644-
// FIXME: This mishandles dynamic denormal mode. We need to query the
9645-
// current mode and restore the original.
9646-
9659+
if (!PreservesDenormals) {
96479660
SDNode *DisableDenorm;
9648-
if (Subtarget->hasDenormModeInst()) {
9661+
if (!HasDynamicDenormals && Subtarget->hasDenormModeInst()) {
96499662
const SDValue DisableDenormValue = getSPDenormModeValue(
96509663
FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget);
96519664

96529665
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
96539666
Fma4.getValue(1), DisableDenormValue,
96549667
Fma4.getValue(2)).getNode();
96559668
} else {
9669+
assert(HasDynamicDenormals == (bool)SavedDenormMode);
96569670
const SDValue DisableDenormValue =
9657-
DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
9671+
HasDynamicDenormals
9672+
? SavedDenormMode
9673+
: DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
96589674

96599675
DisableDenorm = DAG.getMachineNode(
96609676
AMDGPU::S_SETREG_B32, SL, MVT::Other,

0 commit comments

Comments
 (0)