@@ -9589,28 +9589,44 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
9589
9589
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
9590
9590
const DenormalMode DenormMode = Info->getMode().FP32Denormals;
9591
9591
9592
- const bool HasFP32Denormals = DenormMode != DenormalMode::getPreserveSign();
9592
+ const bool PreservesDenormals = DenormMode == DenormalMode::getIEEE();
9593
+ const bool HasDynamicDenormals =
9594
+ (DenormMode.Input == DenormalMode::Dynamic) ||
9595
+ (DenormMode.Output == DenormalMode::Dynamic);
9593
9596
9594
- if (!HasFP32Denormals) {
9597
+ SDValue SavedDenormMode;
9598
+
9599
+ if (!PreservesDenormals) {
9595
9600
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
9596
9601
// lowering. The chain dependence is insufficient, and we need glue. We do
9597
9602
// not need the glue variants in a strictfp function.
9598
9603
9599
9604
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
9600
9605
9606
+ SDValue Glue = DAG.getEntryNode();
9607
+ if (HasDynamicDenormals) {
9608
+ SDNode *GetReg = DAG.getMachineNode(AMDGPU::S_GETREG_B32, SL,
9609
+ DAG.getVTList(MVT::i32, MVT::Glue),
9610
+ {BitField, Glue});
9611
+ SavedDenormMode = SDValue(GetReg, 0);
9612
+
9613
+ Glue = DAG.getMergeValues(
9614
+ {DAG.getEntryNode(), SDValue(GetReg, 0), SDValue(GetReg, 1)}, SL);
9615
+ }
9616
+
9601
9617
SDNode *EnableDenorm;
9602
9618
if (Subtarget->hasDenormModeInst()) {
9603
9619
const SDValue EnableDenormValue =
9604
9620
getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget);
9605
9621
9606
- EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
9607
- DAG.getEntryNode(), EnableDenormValue).getNode();
9622
+ EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs, Glue,
9623
+ EnableDenormValue)
9624
+ .getNode();
9608
9625
} else {
9609
9626
const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
9610
9627
SL, MVT::i32);
9611
- EnableDenorm =
9612
- DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
9613
- {EnableDenormValue, BitField, DAG.getEntryNode()});
9628
+ EnableDenorm = DAG.getMachineNode(AMDGPU::S_SETREG_B32, SL, BindParamVTs,
9629
+ {EnableDenormValue, BitField, Glue});
9614
9630
}
9615
9631
9616
9632
SDValue Ops[3] = {
@@ -9640,21 +9656,21 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
9640
9656
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
9641
9657
NumeratorScaled, Fma3, Flags);
9642
9658
9643
- if (!HasFP32Denormals) {
9644
- // FIXME: This mishandles dynamic denormal mode. We need to query the
9645
- // current mode and restore the original.
9646
-
9659
+ if (!PreservesDenormals) {
9647
9660
SDNode *DisableDenorm;
9648
- if (Subtarget->hasDenormModeInst()) {
9661
+ if (!HasDynamicDenormals && Subtarget->hasDenormModeInst()) {
9649
9662
const SDValue DisableDenormValue = getSPDenormModeValue(
9650
9663
FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget);
9651
9664
9652
9665
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
9653
9666
Fma4.getValue(1), DisableDenormValue,
9654
9667
Fma4.getValue(2)).getNode();
9655
9668
} else {
9669
+ assert(HasDynamicDenormals == (bool)SavedDenormMode);
9656
9670
const SDValue DisableDenormValue =
9657
- DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
9671
+ HasDynamicDenormals
9672
+ ? SavedDenormMode
9673
+ : DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
9658
9674
9659
9675
DisableDenorm = DAG.getMachineNode(
9660
9676
AMDGPU::S_SETREG_B32, SL, MVT::Other,
0 commit comments