Skip to content

Commit 0d27b8b

Browse files
committed
[SDAG] Share signed zero handling for maximum and maximumnum
Use the same code to handle signed zero ordering for maximum and maximumnum legalization. For maximumnum, this reduces the number of comparisons and fixes legalization for the case where the same-sized integer type is not legal.
1 parent 54da543 commit 0d27b8b

File tree

7 files changed

+16998
-20523
lines changed

7 files changed

+16998
-20523
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8559,6 +8559,23 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
85598559
return SDValue();
85608560
}
85618561

8562+
static SDValue emitSignedZeroOrdering(SelectionDAG &DAG, bool IsMax,
8563+
SDValue MinMax, SDValue LHS, SDValue RHS,
8564+
EVT CCVT, SDNodeFlags Flags,
8565+
const SDLoc &DL) {
8566+
EVT VT = MinMax.getValueType();
8567+
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8568+
DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8569+
FloatSignAsInt State;
8570+
DAG.getSignAsIntValue(State, DL, LHS);
8571+
SDValue IsSpecificZero =
8572+
DAG.getSetCC(DL, CCVT, State.IntValue,
8573+
DAG.getConstant(0, DL, State.IntValue.getValueType()),
8574+
IsMax ? ISD::SETEQ : ISD::SETNE);
8575+
SDValue Sel = DAG.getSelect(DL, VT, IsSpecificZero, LHS, RHS, Flags);
8576+
return DAG.getSelect(DL, VT, IsZero, Sel, MinMax, Flags);
8577+
}
8578+
85628579
SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
85638580
SelectionDAG &DAG) const {
85648581
if (SDValue Expanded = expandVectorNaryOpBySplitting(N, DAG))
@@ -8609,18 +8626,9 @@ SDValue TargetLowering::expandFMINIMUM_FMAXIMUM(SDNode *N,
86098626

86108627
// fminimum/fmaximum requires -0.0 less than +0.0
86118628
if (!MinMaxMustRespectOrderedZero && !N->getFlags().hasNoSignedZeros() &&
8612-
!DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS)) {
8613-
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8614-
DAG.getConstantFP(0.0, DL, VT), ISD::SETOEQ);
8615-
FloatSignAsInt State;
8616-
DAG.getSignAsIntValue(State, DL, LHS);
8617-
SDValue IsSpecificZero =
8618-
DAG.getSetCC(DL, CCVT, State.IntValue,
8619-
DAG.getConstant(0, DL, State.IntValue.getValueType()),
8620-
IsMax ? ISD::SETEQ : ISD::SETNE);
8621-
SDValue Sel = DAG.getSelect(DL, VT, IsSpecificZero, LHS, RHS, Flags);
8622-
MinMax = DAG.getSelect(DL, VT, IsZero, Sel, MinMax, Flags);
8623-
}
8629+
!DAG.isKnownNeverZeroFloat(RHS) && !DAG.isKnownNeverZeroFloat(LHS))
8630+
return emitSignedZeroOrdering(DAG, IsMax, MinMax, LHS, RHS, CCVT, Flags,
8631+
DL);
86248632

86258633
return MinMax;
86268634
}
@@ -8697,17 +8705,7 @@ SDValue TargetLowering::expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *Node,
86978705
DAG.isKnownNeverZeroFloat(LHS) || DAG.isKnownNeverZeroFloat(RHS)) {
86988706
return MinMax;
86998707
}
8700-
SDValue TestZero =
8701-
DAG.getTargetConstant(IsMax ? fcPosZero : fcNegZero, DL, MVT::i32);
8702-
SDValue IsZero = DAG.getSetCC(DL, CCVT, MinMax,
8703-
DAG.getConstantFP(0.0, DL, VT), ISD::SETEQ);
8704-
SDValue LCmp = DAG.getSelect(
8705-
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, LHS, TestZero), LHS,
8706-
MinMax, Flags);
8707-
SDValue RCmp = DAG.getSelect(
8708-
DL, VT, DAG.getNode(ISD::IS_FPCLASS, DL, CCVT, RHS, TestZero), RHS, LCmp,
8709-
Flags);
8710-
return DAG.getSelect(DL, VT, IsZero, RCmp, MinMax, Flags);
8708+
return emitSignedZeroOrdering(DAG, IsMax, MinMax, LHS, RHS, CCVT, Flags, DL);
87118709
}
87128710

87138711
/// Returns a true value if if this FPClassTest can be performed with an ordered

llvm/test/CodeGen/AMDGPU/fmax3-maximumnum.ll

Lines changed: 312 additions & 433 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/fmin3-minimumnum.ll

Lines changed: 354 additions & 480 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/maximumnum.bf16.ll

Lines changed: 7782 additions & 9469 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AMDGPU/minimumnum.bf16.ll

Lines changed: 8103 additions & 9855 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/Mips/fp-maximumnum-minimumnum.ll

Lines changed: 30 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,16 @@ define float @maximumnum_float(float %x, float %y) {
1717
;
1818
; MIPS64R2-LABEL: maximumnum_float:
1919
; MIPS64R2: # %bb.0:
20+
; MIPS64R2-NEXT: mov.s $f0, $f13
2021
; MIPS64R2-NEXT: c.un.s $f12, $f12
2122
; MIPS64R2-NEXT: movt.s $f12, $f13, $fcc0
2223
; MIPS64R2-NEXT: c.un.s $f13, $f13
23-
; MIPS64R2-NEXT: movt.s $f13, $f12, $fcc0
24-
; MIPS64R2-NEXT: c.ule.s $f12, $f13
25-
; MIPS64R2-NEXT: mov.s $f0, $f13
26-
; MIPS64R2-NEXT: movf.s $f0, $f12, $fcc0
24+
; MIPS64R2-NEXT: movt.s $f0, $f12, $fcc0
2725
; MIPS64R2-NEXT: mfc1 $1, $f12
2826
; MIPS64R2-NEXT: mov.s $f1, $f0
2927
; MIPS64R2-NEXT: movz.s $f1, $f12, $1
30-
; MIPS64R2-NEXT: mfc1 $1, $f13
31-
; MIPS64R2-NEXT: movz.s $f1, $f13, $1
28+
; MIPS64R2-NEXT: c.ule.s $f12, $f0
29+
; MIPS64R2-NEXT: movf.s $f0, $f12, $fcc0
3230
; MIPS64R2-NEXT: mtc1 $zero, $f2
3331
; MIPS64R2-NEXT: c.eq.s $f0, $f2
3432
; MIPS64R2-NEXT: jr $ra
@@ -67,14 +65,12 @@ define float @maximumnum_float_nnan(float %x, float %y) {
6765
;
6866
; MIPS64R2-LABEL: maximumnum_float_nnan:
6967
; MIPS64R2: # %bb.0:
70-
; MIPS64R2-NEXT: c.ule.s $f12, $f13
7168
; MIPS64R2-NEXT: mov.s $f0, $f13
72-
; MIPS64R2-NEXT: movf.s $f0, $f12, $fcc0
7369
; MIPS64R2-NEXT: mfc1 $1, $f12
74-
; MIPS64R2-NEXT: mov.s $f1, $f0
70+
; MIPS64R2-NEXT: mov.s $f1, $f13
7571
; MIPS64R2-NEXT: movz.s $f1, $f12, $1
76-
; MIPS64R2-NEXT: mfc1 $1, $f13
77-
; MIPS64R2-NEXT: movz.s $f1, $f13, $1
72+
; MIPS64R2-NEXT: c.ule.s $f12, $f13
73+
; MIPS64R2-NEXT: movf.s $f0, $f12, $fcc0
7874
; MIPS64R2-NEXT: mtc1 $zero, $f2
7975
; MIPS64R2-NEXT: c.eq.s $f0, $f2
8076
; MIPS64R2-NEXT: jr $ra
@@ -94,18 +90,16 @@ define double @maximumnum_double(double %x, double %y) {
9490
;
9591
; MIPS64R2-LABEL: maximumnum_double:
9692
; MIPS64R2: # %bb.0:
93+
; MIPS64R2-NEXT: mov.d $f0, $f13
9794
; MIPS64R2-NEXT: c.un.d $f12, $f12
9895
; MIPS64R2-NEXT: movt.d $f12, $f13, $fcc0
9996
; MIPS64R2-NEXT: c.un.d $f13, $f13
100-
; MIPS64R2-NEXT: movt.d $f13, $f12, $fcc0
101-
; MIPS64R2-NEXT: c.ule.d $f12, $f13
102-
; MIPS64R2-NEXT: mov.d $f0, $f13
103-
; MIPS64R2-NEXT: movf.d $f0, $f12, $fcc0
97+
; MIPS64R2-NEXT: movt.d $f0, $f12, $fcc0
10498
; MIPS64R2-NEXT: dmfc1 $1, $f12
10599
; MIPS64R2-NEXT: mov.d $f1, $f0
106100
; MIPS64R2-NEXT: movz.d $f1, $f12, $1
107-
; MIPS64R2-NEXT: dmfc1 $1, $f13
108-
; MIPS64R2-NEXT: movz.d $f1, $f13, $1
101+
; MIPS64R2-NEXT: c.ule.d $f12, $f0
102+
; MIPS64R2-NEXT: movf.d $f0, $f12, $fcc0
109103
; MIPS64R2-NEXT: dmtc1 $zero, $f2
110104
; MIPS64R2-NEXT: c.eq.d $f0, $f2
111105
; MIPS64R2-NEXT: jr $ra
@@ -144,14 +138,12 @@ define double @maximumnum_double_nnan(double %x, double %y) {
144138
;
145139
; MIPS64R2-LABEL: maximumnum_double_nnan:
146140
; MIPS64R2: # %bb.0:
147-
; MIPS64R2-NEXT: c.ule.d $f12, $f13
148141
; MIPS64R2-NEXT: mov.d $f0, $f13
149-
; MIPS64R2-NEXT: movf.d $f0, $f12, $fcc0
150142
; MIPS64R2-NEXT: dmfc1 $1, $f12
151-
; MIPS64R2-NEXT: mov.d $f1, $f0
143+
; MIPS64R2-NEXT: mov.d $f1, $f13
152144
; MIPS64R2-NEXT: movz.d $f1, $f12, $1
153-
; MIPS64R2-NEXT: dmfc1 $1, $f13
154-
; MIPS64R2-NEXT: movz.d $f1, $f13, $1
145+
; MIPS64R2-NEXT: c.ule.d $f12, $f13
146+
; MIPS64R2-NEXT: movf.d $f0, $f12, $fcc0
155147
; MIPS64R2-NEXT: dmtc1 $zero, $f2
156148
; MIPS64R2-NEXT: c.eq.d $f0, $f2
157149
; MIPS64R2-NEXT: jr $ra
@@ -170,21 +162,16 @@ define float @minimumnum_float(float %x, float %y) {
170162
;
171163
; MIPS64R2-LABEL: minimumnum_float:
172164
; MIPS64R2: # %bb.0:
165+
; MIPS64R2-NEXT: mov.s $f0, $f13
173166
; MIPS64R2-NEXT: c.un.s $f12, $f12
174167
; MIPS64R2-NEXT: movt.s $f12, $f13, $fcc0
175168
; MIPS64R2-NEXT: c.un.s $f13, $f13
176-
; MIPS64R2-NEXT: movt.s $f13, $f12, $fcc0
177-
; MIPS64R2-NEXT: c.olt.s $f12, $f13
178-
; MIPS64R2-NEXT: mov.s $f0, $f13
179169
; MIPS64R2-NEXT: movt.s $f0, $f12, $fcc0
180170
; MIPS64R2-NEXT: mfc1 $1, $f12
181-
; MIPS64R2-NEXT: lui $2, 32768
182-
; MIPS64R2-NEXT: xor $1, $1, $2
183171
; MIPS64R2-NEXT: mov.s $f1, $f0
184-
; MIPS64R2-NEXT: movz.s $f1, $f12, $1
185-
; MIPS64R2-NEXT: mfc1 $1, $f13
186-
; MIPS64R2-NEXT: xor $1, $1, $2
187-
; MIPS64R2-NEXT: movz.s $f1, $f13, $1
172+
; MIPS64R2-NEXT: movn.s $f1, $f12, $1
173+
; MIPS64R2-NEXT: c.olt.s $f12, $f0
174+
; MIPS64R2-NEXT: movt.s $f0, $f12, $fcc0
188175
; MIPS64R2-NEXT: mtc1 $zero, $f2
189176
; MIPS64R2-NEXT: c.eq.s $f0, $f2
190177
; MIPS64R2-NEXT: jr $ra
@@ -223,17 +210,12 @@ define float @minimumnum_float_nnan(float %x, float %y) {
223210
;
224211
; MIPS64R2-LABEL: minimumnum_float_nnan:
225212
; MIPS64R2: # %bb.0:
226-
; MIPS64R2-NEXT: c.olt.s $f12, $f13
227213
; MIPS64R2-NEXT: mov.s $f0, $f13
228-
; MIPS64R2-NEXT: movt.s $f0, $f12, $fcc0
229214
; MIPS64R2-NEXT: mfc1 $1, $f12
230-
; MIPS64R2-NEXT: lui $2, 32768
231-
; MIPS64R2-NEXT: xor $1, $1, $2
232-
; MIPS64R2-NEXT: mov.s $f1, $f0
233-
; MIPS64R2-NEXT: movz.s $f1, $f12, $1
234-
; MIPS64R2-NEXT: mfc1 $1, $f13
235-
; MIPS64R2-NEXT: xor $1, $1, $2
236-
; MIPS64R2-NEXT: movz.s $f1, $f13, $1
215+
; MIPS64R2-NEXT: mov.s $f1, $f13
216+
; MIPS64R2-NEXT: movn.s $f1, $f12, $1
217+
; MIPS64R2-NEXT: c.olt.s $f12, $f13
218+
; MIPS64R2-NEXT: movt.s $f0, $f12, $fcc0
237219
; MIPS64R2-NEXT: mtc1 $zero, $f2
238220
; MIPS64R2-NEXT: c.eq.s $f0, $f2
239221
; MIPS64R2-NEXT: jr $ra
@@ -252,22 +234,16 @@ define double @minimumnum_double(double %x, double %y) {
252234
;
253235
; MIPS64R2-LABEL: minimumnum_double:
254236
; MIPS64R2: # %bb.0:
237+
; MIPS64R2-NEXT: mov.d $f0, $f13
255238
; MIPS64R2-NEXT: c.un.d $f12, $f12
256239
; MIPS64R2-NEXT: movt.d $f12, $f13, $fcc0
257240
; MIPS64R2-NEXT: c.un.d $f13, $f13
258-
; MIPS64R2-NEXT: movt.d $f13, $f12, $fcc0
259-
; MIPS64R2-NEXT: c.olt.d $f12, $f13
260-
; MIPS64R2-NEXT: mov.d $f0, $f13
261241
; MIPS64R2-NEXT: movt.d $f0, $f12, $fcc0
262242
; MIPS64R2-NEXT: dmfc1 $1, $f12
263-
; MIPS64R2-NEXT: daddiu $2, $zero, 1
264-
; MIPS64R2-NEXT: dsll $2, $2, 63
265-
; MIPS64R2-NEXT: xor $1, $1, $2
266243
; MIPS64R2-NEXT: mov.d $f1, $f0
267-
; MIPS64R2-NEXT: movz.d $f1, $f12, $1
268-
; MIPS64R2-NEXT: dmfc1 $1, $f13
269-
; MIPS64R2-NEXT: xor $1, $1, $2
270-
; MIPS64R2-NEXT: movz.d $f1, $f13, $1
244+
; MIPS64R2-NEXT: movn.d $f1, $f12, $1
245+
; MIPS64R2-NEXT: c.olt.d $f12, $f0
246+
; MIPS64R2-NEXT: movt.d $f0, $f12, $fcc0
271247
; MIPS64R2-NEXT: dmtc1 $zero, $f2
272248
; MIPS64R2-NEXT: c.eq.d $f0, $f2
273249
; MIPS64R2-NEXT: jr $ra
@@ -306,18 +282,12 @@ define double @minimumnum_double_nnan(double %x, double %y) {
306282
;
307283
; MIPS64R2-LABEL: minimumnum_double_nnan:
308284
; MIPS64R2: # %bb.0:
309-
; MIPS64R2-NEXT: c.olt.d $f12, $f13
310285
; MIPS64R2-NEXT: mov.d $f0, $f13
286+
; MIPS64R2-NEXT: dmfc1 $1, $f12
287+
; MIPS64R2-NEXT: mov.d $f1, $f13
288+
; MIPS64R2-NEXT: movn.d $f1, $f12, $1
289+
; MIPS64R2-NEXT: c.olt.d $f12, $f13
311290
; MIPS64R2-NEXT: movt.d $f0, $f12, $fcc0
312-
; MIPS64R2-NEXT: daddiu $1, $zero, 1
313-
; MIPS64R2-NEXT: dsll $1, $1, 63
314-
; MIPS64R2-NEXT: dmfc1 $2, $f12
315-
; MIPS64R2-NEXT: xor $2, $2, $1
316-
; MIPS64R2-NEXT: mov.d $f1, $f0
317-
; MIPS64R2-NEXT: movz.d $f1, $f12, $2
318-
; MIPS64R2-NEXT: dmfc1 $2, $f13
319-
; MIPS64R2-NEXT: xor $1, $2, $1
320-
; MIPS64R2-NEXT: movz.d $f1, $f13, $1
321291
; MIPS64R2-NEXT: dmtc1 $zero, $f2
322292
; MIPS64R2-NEXT: c.eq.d $f0, $f2
323293
; MIPS64R2-NEXT: jr $ra

0 commit comments

Comments
 (0)