@@ -22048,15 +22048,25 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
22048
22048
// Extend everything to 80 bits to force it to be done on x87.
22049
22049
// TODO: Are there any fast-math-flags to propagate here?
22050
22050
if (IsStrict) {
22051
- SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
22052
- {Chain, Fild, Fudge});
22051
+ unsigned Opc = ISD::STRICT_FADD;
22052
+ // Windows needs the precision control changed to 80bits around this add.
22053
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
22054
+ Opc = X86ISD::STRICT_FP80_ADD;
22055
+
22056
+ SDValue Add =
22057
+ DAG.getNode(Opc, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge});
22053
22058
// STRICT_FP_ROUND can't handle equal types.
22054
22059
if (DstVT == MVT::f80)
22055
22060
return Add;
22056
22061
return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
22057
22062
{Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
22058
22063
}
22059
- SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
22064
+ unsigned Opc = ISD::FADD;
22065
+ // Windows needs the precision control changed to 80bits around this add.
22066
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
22067
+ Opc = X86ISD::FP80_ADD;
22068
+
22069
+ SDValue Add = DAG.getNode(Opc, dl, MVT::f80, Fild, Fudge);
22060
22070
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
22061
22071
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
22062
22072
}
@@ -34881,6 +34891,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
34881
34891
NODE_NAME_CASE(AESDECWIDE256KL)
34882
34892
NODE_NAME_CASE(CMPCCXADD)
34883
34893
NODE_NAME_CASE(TESTUI)
34894
+ NODE_NAME_CASE(FP80_ADD)
34895
+ NODE_NAME_CASE(STRICT_FP80_ADD)
34884
34896
}
34885
34897
return nullptr;
34886
34898
#undef NODE_NAME_CASE
@@ -37356,6 +37368,69 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
37356
37368
case X86::CMOV_VK64:
37357
37369
return EmitLoweredSelect(MI, BB);
37358
37370
37371
+ case X86::FP80_ADDr:
37372
+ case X86::FP80_ADDm32: {
37373
+ // Change the floating point control register to use double extended
37374
+ // precision when performing the addition.
37375
+ int OrigCWFrameIdx =
37376
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
37377
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)),
37378
+ OrigCWFrameIdx);
37379
+
37380
+ // Load the old value of the control word...
37381
+ Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
37382
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
37383
+ OrigCWFrameIdx);
37384
+
37385
+ // OR 0b11 into bit 8 and 9. 0b11 is the encoding for double extended
37386
+ // precision.
37387
+ Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
37388
+ BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
37389
+ .addReg(OldCW, RegState::Kill)
37390
+ .addImm(0x300);
37391
+
37392
+ // Extract to 16 bits.
37393
+ Register NewCW16 =
37394
+ MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
37395
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
37396
+ .addReg(NewCW, RegState::Kill, X86::sub_16bit);
37397
+
37398
+ // Prepare memory for FLDCW.
37399
+ int NewCWFrameIdx =
37400
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
37401
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
37402
+ NewCWFrameIdx)
37403
+ .addReg(NewCW16, RegState::Kill);
37404
+
37405
+ // Reload the modified control word now...
37406
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
37407
+ NewCWFrameIdx);
37408
+
37409
+ // Do the addition.
37410
+ if (MI.getOpcode() == X86::FP80_ADDr) {
37411
+ BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80))
37412
+ .add(MI.getOperand(0))
37413
+ .add(MI.getOperand(1))
37414
+ .add(MI.getOperand(2));
37415
+ } else {
37416
+ BuildMI(*BB, MI, DL, TII->get(X86::ADD_Fp80m32))
37417
+ .add(MI.getOperand(0))
37418
+ .add(MI.getOperand(1))
37419
+ .add(MI.getOperand(2))
37420
+ .add(MI.getOperand(3))
37421
+ .add(MI.getOperand(4))
37422
+ .add(MI.getOperand(5))
37423
+ .add(MI.getOperand(6));
37424
+ }
37425
+
37426
+ // Reload the original control word now.
37427
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FLDCW16m)),
37428
+ OrigCWFrameIdx);
37429
+
37430
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
37431
+ return BB;
37432
+ }
37433
+
37359
37434
case X86::FP32_TO_INT16_IN_MEM:
37360
37435
case X86::FP32_TO_INT32_IN_MEM:
37361
37436
case X86::FP32_TO_INT64_IN_MEM:
0 commit comments