Skip to content

Commit c784fd8

Browse files
committed
Combining umax umin, smin, smax
1 parent f8575c9 commit c784fd8

File tree

2 files changed

+235
-29
lines changed

2 files changed

+235
-29
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10087,40 +10087,43 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
1008710087
return Combined;
1008810088

1008910089
// fold (xor (smin(x, C), C)) -> select (x < C), xor(x, C), 0
10090-
// fold (xor (smin(C, x), C)) -> select (x < C), xor(x, C), 0
10091-
if (N0.getOpcode() == ISD::SMIN && N0.hasOneUse()) {
10092-
SDValue Op0 = N0.getOperand(0);
10093-
SDValue Op1 = N0.getOperand(1);
10094-
10095-
if (Op1 != N1) {
10096-
std::swap(Op0, Op1);
10097-
}
10098-
10099-
if (Op1 == N1) {
10100-
if (isa<ConstantSDNode>(N1)) {
10101-
EVT CCVT = getSetCCResultType(VT);
10102-
SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETLT);
10103-
SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1);
10104-
SDValue Zero = DAG.getConstant(0, SDLoc(N), VT);
10105-
return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero);
10106-
}
10107-
}
10108-
}
10109-
1011010090
// fold (xor (smax(x, C), C)) -> select (x > C), xor(x, C), 0
10111-
// fold (xor (smax(C, x), C)) -> select (x > C), xor(x, C), 0
10112-
if (N0.getOpcode() == ISD::SMAX && N0.hasOneUse()) {
10091+
// fold (xor (umin(x, C), C)) -> select (x < C), xor(x, C), 0
10092+
// fold (xor (umax(x, C), C)) -> select (x > C), xor(x, C), 0
10093+
if ((N0.getOpcode() == ISD::SMIN || N0.getOpcode() == ISD::SMAX ||
10094+
N0.getOpcode() == ISD::UMIN || N0.getOpcode() == ISD::UMAX) &&
10095+
N0.hasOneUse()) {
1011310096
SDValue Op0 = N0.getOperand(0);
1011410097
SDValue Op1 = N0.getOperand(1);
1011510098

10116-
if (Op1 != N1) {
10117-
std::swap(Op0, Op1);
10118-
}
10119-
1012010099
if (Op1 == N1) {
10121-
if (isa<ConstantSDNode>(N1)) {
10100+
if (isa<ConstantSDNode>(N1) ||
10101+
ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) {
10102+
// For vectors, only optimize when the constant is zero or all-ones to
10103+
// avoid generating more instructions
10104+
if (VT.isVector()) {
10105+
ConstantSDNode *N1C = isConstOrConstSplat(N1);
10106+
if (!N1C || (!N1C->isZero() && !N1C->isAllOnes()))
10107+
return SDValue();
10108+
}
10109+
1012210110
EVT CCVT = getSetCCResultType(VT);
10123-
SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, ISD::SETGT);
10111+
ISD::CondCode CC;
10112+
switch (N0.getOpcode()) {
10113+
case ISD::SMIN:
10114+
CC = ISD::SETLT;
10115+
break;
10116+
case ISD::SMAX:
10117+
CC = ISD::SETGT;
10118+
break;
10119+
case ISD::UMIN:
10120+
CC = ISD::SETULT;
10121+
break;
10122+
case ISD::UMAX:
10123+
CC = ISD::SETUGT;
10124+
break;
10125+
}
10126+
SDValue Cmp = DAG.getSetCC(SDLoc(N), CCVT, Op0, N1, CC);
1012410127
SDValue XorXC = DAG.getNode(ISD::XOR, SDLoc(N), VT, Op0, N1);
1012510128
SDValue Zero = DAG.getConstant(0, SDLoc(N), VT);
1012610129
return DAG.getSelect(SDLoc(N), VT, Cmp, XorXC, Zero);

llvm/test/CodeGen/AArch64/xor-smin-smax.ll

Lines changed: 204 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,5 +71,208 @@ define i64 @test_smax_constant(i64 %a) {
7171
ret i64 %retval.0
7272
}
7373

74+
define i64 @test_umin_neg_one(i64 %a) {
75+
; CHECK-LABEL: test_umin_neg_one:
76+
; CHECK: // %bb.0:
77+
; CHECK-NEXT: mvn x0, x0
78+
; CHECK-NEXT: ret
79+
%1 = tail call i64 @llvm.umin.i64(i64 %a, i64 -1)
80+
%retval.0 = xor i64 %1, -1
81+
ret i64 %retval.0
82+
}
83+
84+
define i64 @test_umin_zero(i64 %a) {
85+
; CHECK-LABEL: test_umin_zero:
86+
; CHECK: // %bb.0:
87+
; CHECK-NEXT: mov x0, xzr
88+
; CHECK-NEXT: ret
89+
%1 = tail call i64 @llvm.umin.i64(i64 %a, i64 0)
90+
%retval.0 = xor i64 %1, 0
91+
ret i64 %retval.0
92+
}
93+
94+
define i64 @test_umin_constant(i64 %a) {
95+
; CHECK-LABEL: test_umin_constant:
96+
; CHECK: // %bb.0:
97+
; CHECK-NEXT: eor x8, x0, #0x8
98+
; CHECK-NEXT: cmp x0, #8
99+
; CHECK-NEXT: csel x0, x8, xzr, lo
100+
; CHECK-NEXT: ret
101+
%1 = tail call i64 @llvm.umin.i64(i64 %a, i64 8)
102+
%retval.0 = xor i64 %1, 8
103+
ret i64 %retval.0
104+
}
105+
106+
define i64 @test_umax_neg_one(i64 %a) {
107+
; CHECK-LABEL: test_umax_neg_one:
108+
; CHECK: // %bb.0:
109+
; CHECK-NEXT: mov x0, xzr
110+
; CHECK-NEXT: ret
111+
%1 = tail call i64 @llvm.umax.i64(i64 %a, i64 -1)
112+
%retval.0 = xor i64 %1, -1
113+
ret i64 %retval.0
114+
}
115+
116+
define i64 @test_umax_zero(i64 %a) {
117+
; CHECK-LABEL: test_umax_zero:
118+
; CHECK: // %bb.0:
119+
; CHECK-NEXT: ret
120+
%1 = tail call i64 @llvm.umax.i64(i64 %a, i64 0)
121+
%retval.0 = xor i64 %1, 0
122+
ret i64 %retval.0
123+
}
124+
125+
define i64 @test_umax_constant(i64 %a) {
126+
; CHECK-LABEL: test_umax_constant:
127+
; CHECK: // %bb.0:
128+
; CHECK-NEXT: eor x8, x0, #0x8
129+
; CHECK-NEXT: cmp x0, #8
130+
; CHECK-NEXT: csel x0, x8, xzr, hi
131+
; CHECK-NEXT: ret
132+
%1 = tail call i64 @llvm.umax.i64(i64 %a, i64 8)
133+
%retval.0 = xor i64 %1, 8
134+
ret i64 %retval.0
135+
}
136+
137+
; Test vector cases
138+
139+
define <4 x i32> @test_smin_vector_neg_one(<4 x i32> %a) {
140+
; CHECK-LABEL: test_smin_vector_neg_one:
141+
; CHECK: // %bb.0:
142+
; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff
143+
; CHECK-NEXT: cmgt v1.4s, v1.4s, v0.4s
144+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
145+
; CHECK-NEXT: ret
146+
%1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
147+
%retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
148+
ret <4 x i32> %retval.0
149+
}
150+
151+
define <4 x i32> @test_smin_vector_zero(<4 x i32> %a) {
152+
; CHECK-LABEL: test_smin_vector_zero:
153+
; CHECK: // %bb.0:
154+
; CHECK-NEXT: movi v1.2d, #0000000000000000
155+
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
156+
; CHECK-NEXT: ret
157+
%1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
158+
%retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
159+
ret <4 x i32> %retval.0
160+
}
161+
162+
define <4 x i32> @test_smin_vector_constant(<4 x i32> %a) {
163+
; CHECK-LABEL: test_smin_vector_constant:
164+
; CHECK: // %bb.0:
165+
; CHECK-NEXT: movi v1.4s, #8
166+
; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s
167+
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
168+
; CHECK-NEXT: ret
169+
%1 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
170+
%retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
171+
ret <4 x i32> %retval.0
172+
}
173+
174+
define <4 x i32> @test_smax_vector_neg_one(<4 x i32> %a) {
175+
; CHECK-LABEL: test_smax_vector_neg_one:
176+
; CHECK: // %bb.0:
177+
; CHECK-NEXT: cmge v1.4s, v0.4s, #0
178+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
179+
; CHECK-NEXT: ret
180+
%1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
181+
%retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
182+
ret <4 x i32> %retval.0
183+
}
184+
185+
define <4 x i32> @test_smax_vector_zero(<4 x i32> %a) {
186+
; CHECK-LABEL: test_smax_vector_zero:
187+
; CHECK: // %bb.0:
188+
; CHECK-NEXT: movi v1.2d, #0000000000000000
189+
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
190+
; CHECK-NEXT: ret
191+
%1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
192+
%retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
193+
ret <4 x i32> %retval.0
194+
}
195+
196+
define <4 x i32> @test_smax_vector_constant(<4 x i32> %a) {
197+
; CHECK-LABEL: test_smax_vector_constant:
198+
; CHECK: // %bb.0:
199+
; CHECK-NEXT: movi v1.4s, #8
200+
; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
201+
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
202+
; CHECK-NEXT: ret
203+
%1 = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
204+
%retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
205+
ret <4 x i32> %retval.0
206+
}
207+
208+
define <4 x i32> @test_umin_vector_neg_one(<4 x i32> %a) {
209+
; CHECK-LABEL: test_umin_vector_neg_one:
210+
; CHECK: // %bb.0:
211+
; CHECK-NEXT: mvn v0.16b, v0.16b
212+
; CHECK-NEXT: ret
213+
%1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
214+
%retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
215+
ret <4 x i32> %retval.0
216+
}
217+
218+
define <4 x i32> @test_umin_vector_zero(<4 x i32> %a) {
219+
; CHECK-LABEL: test_umin_vector_zero:
220+
; CHECK: // %bb.0:
221+
; CHECK-NEXT: movi v0.2d, #0000000000000000
222+
; CHECK-NEXT: ret
223+
%1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
224+
%retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
225+
ret <4 x i32> %retval.0
226+
}
227+
228+
define <4 x i32> @test_umin_vector_constant(<4 x i32> %a) {
229+
; CHECK-LABEL: test_umin_vector_constant:
230+
; CHECK: // %bb.0:
231+
; CHECK-NEXT: movi v1.4s, #8
232+
; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s
233+
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
234+
; CHECK-NEXT: ret
235+
%1 = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
236+
%retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
237+
ret <4 x i32> %retval.0
238+
}
239+
240+
define <4 x i32> @test_umax_vector_neg_one(<4 x i32> %a) {
241+
; CHECK-LABEL: test_umax_vector_neg_one:
242+
; CHECK: // %bb.0:
243+
; CHECK-NEXT: movi v0.2d, #0000000000000000
244+
; CHECK-NEXT: ret
245+
%1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
246+
%retval.0 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1>
247+
ret <4 x i32> %retval.0
248+
}
249+
250+
define <4 x i32> @test_umax_vector_zero(<4 x i32> %a) {
251+
; CHECK-LABEL: test_umax_vector_zero:
252+
; CHECK: // %bb.0:
253+
; CHECK-NEXT: ret
254+
%1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
255+
%retval.0 = xor <4 x i32> %1, <i32 0, i32 0, i32 0, i32 0>
256+
ret <4 x i32> %retval.0
257+
}
258+
259+
define <4 x i32> @test_umax_vector_constant(<4 x i32> %a) {
260+
; CHECK-LABEL: test_umax_vector_constant:
261+
; CHECK: // %bb.0:
262+
; CHECK-NEXT: movi v1.4s, #8
263+
; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
264+
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
265+
; CHECK-NEXT: ret
266+
%1 = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> <i32 8, i32 8, i32 8, i32 8>)
267+
%retval.0 = xor <4 x i32> %1, <i32 8, i32 8, i32 8, i32 8>
268+
ret <4 x i32> %retval.0
269+
}
270+
74271
declare i64 @llvm.smin.i64(i64, i64)
75-
declare i64 @llvm.smax.i64(i64, i64)
272+
declare i64 @llvm.smax.i64(i64, i64)
273+
declare i64 @llvm.umin.i64(i64, i64)
274+
declare i64 @llvm.umax.i64(i64, i64)
275+
declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
276+
declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
277+
declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
278+
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)

0 commit comments

Comments
 (0)