Skip to content

Commit bcb4984

Browse files
committed
[X86] select-smin-smax.ll - add i128 tests
Helps check quality of legality codegen (all we had was x86 i64 handling)
1 parent 263e458 commit bcb4984

File tree

2 files changed

+127
-36
lines changed

2 files changed

+127
-36
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 21 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ namespace {
401401
SDValue PromoteExtend(SDValue Op);
402402
bool PromoteLoad(SDValue Op);
403403

404-
SDValue foldShiftToAvg(SDNode *N);
404+
SDValue foldShiftToAvg(SDNode *N, const SDLoc &DL);
405405
// Fold `a bitwiseop (~b +/- c)` -> `a bitwiseop ~(b -/+ c)`
406406
SDValue foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT);
407407

@@ -10983,7 +10983,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
1098310983
if (SDValue NarrowLoad = reduceLoadWidth(N))
1098410984
return NarrowLoad;
1098510985

10986-
if (SDValue AVG = foldShiftToAvg(N))
10986+
if (SDValue AVG = foldShiftToAvg(N, DL))
1098710987
return AVG;
1098810988

1098910989
return SDValue();
@@ -11256,7 +11256,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
1125611256
if (SDValue MULH = combineShiftToMULH(N, DL, DAG, TLI))
1125711257
return MULH;
1125811258

11259-
if (SDValue AVG = foldShiftToAvg(N))
11259+
if (SDValue AVG = foldShiftToAvg(N, DL))
1126011260
return AVG;
1126111261

1126211262
return SDValue();
@@ -11772,51 +11772,36 @@ static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
1177211772
}
1177311773
}
1177411774

11775-
SDValue DAGCombiner::foldShiftToAvg(SDNode *N) {
11775+
// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
11776+
SDValue DAGCombiner::foldShiftToAvg(SDNode *N, const SDLoc &DL) {
1177611777
const unsigned Opcode = N->getOpcode();
11777-
11778-
// Convert (sr[al] (add n[su]w x, y)) -> (avgfloor[su] x, y)
1177911778
if (Opcode != ISD::SRA && Opcode != ISD::SRL)
1178011779
return SDValue();
1178111780

11782-
unsigned FloorISD = 0;
11783-
auto VT = N->getValueType(0);
11784-
bool IsUnsigned = false;
11785-
11786-
// Decide wether signed or unsigned.
11787-
switch (Opcode) {
11788-
case ISD::SRA:
11789-
if (!hasOperation(ISD::AVGFLOORS, VT))
11790-
return SDValue();
11791-
FloorISD = ISD::AVGFLOORS;
11792-
break;
11793-
case ISD::SRL:
11794-
IsUnsigned = true;
11795-
if (!hasOperation(ISD::AVGFLOORU, VT))
11796-
return SDValue();
11797-
FloorISD = ISD::AVGFLOORU;
11798-
break;
11799-
default:
11800-
return SDValue();
11801-
}
11781+
EVT VT = N->getValueType(0);
11782+
bool IsUnsigned = Opcode == ISD::SRL;
1180211783

1180311784
// Captured values.
1180411785
SDValue A, B, Add;
1180511786

1180611787
// Match floor average as it is common to both floor/ceil avgs.
11807-
if (!sd_match(N, m_BinOp(Opcode,
11808-
m_AllOf(m_Value(Add), m_Add(m_Value(A), m_Value(B))),
11809-
m_One())))
11810-
return SDValue();
11788+
if (sd_match(N, m_BinOp(Opcode,
11789+
m_AllOf(m_Value(Add), m_Add(m_Value(A), m_Value(B))),
11790+
m_One()))) {
11791+
// Decide whether signed or unsigned.
11792+
unsigned FloorISD = IsUnsigned ? ISD::AVGFLOORU : ISD::AVGFLOORS;
11793+
if (!hasOperation(FloorISD, VT))
11794+
return SDValue();
1181111795

11812-
// Can't optimize adds that may wrap.
11813-
if (IsUnsigned && !Add->getFlags().hasNoUnsignedWrap())
11814-
return SDValue();
11796+
// Can't optimize adds that may wrap.
11797+
if ((IsUnsigned && !Add->getFlags().hasNoUnsignedWrap()) ||
11798+
(!IsUnsigned && !Add->getFlags().hasNoSignedWrap()))
11799+
return SDValue();
1181511800

11816-
if (!IsUnsigned && !Add->getFlags().hasNoSignedWrap())
11817-
return SDValue();
11801+
return DAG.getNode(FloorISD, DL, N->getValueType(0), {A, B});
11802+
}
1181811803

11819-
return DAG.getNode(FloorISD, SDLoc(N), N->getValueType(0), {A, B});
11804+
return SDValue();
1182011805
}
1182111806

1182211807
SDValue DAGCombiner::foldBitwiseOpWithNeg(SDNode *N, const SDLoc &DL, EVT VT) {

llvm/test/CodeGen/X86/select-smin-smax.ll

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ declare i32 @llvm.smax.i32(i32, i32)
1212
declare i32 @llvm.smin.i32(i32, i32)
1313
declare i64 @llvm.smax.i64(i64, i64)
1414
declare i64 @llvm.smin.i64(i64, i64)
15+
declare i128 @llvm.smax.i128(i128, i128)
16+
declare i128 @llvm.smin.i128(i128, i128)
1517

1618
define i8 @test_i8_smax(i8 %a) nounwind {
1719
; X64-LABEL: test_i8_smax:
@@ -259,3 +261,107 @@ define i64 @test_i64_smin(i64 %a) nounwind {
259261
%r = call i64 @llvm.smin.i64(i64 %a, i64 0)
260262
ret i64 %r
261263
}
264+
265+
define i128 @test_i128_smax(i128 %a) nounwind {
266+
; X64-LABEL: test_i128_smax:
267+
; X64: # %bb.0:
268+
; X64-NEXT: movq %rdi, %rax
269+
; X64-NEXT: xorl %edx, %edx
270+
; X64-NEXT: testq %rsi, %rsi
271+
; X64-NEXT: cmovsq %rdx, %rax
272+
; X64-NEXT: cmovgq %rsi, %rdx
273+
; X64-NEXT: retq
274+
;
275+
; X86-BMI-LABEL: test_i128_smax:
276+
; X86-BMI: # %bb.0:
277+
; X86-BMI-NEXT: pushl %edi
278+
; X86-BMI-NEXT: pushl %esi
279+
; X86-BMI-NEXT: pushl %eax
280+
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax
281+
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
282+
; X86-BMI-NEXT: xorl %edx, %edx
283+
; X86-BMI-NEXT: testl %ecx, %ecx
284+
; X86-BMI-NEXT: cmovlel %edx, %ecx
285+
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi
286+
; X86-BMI-NEXT: cmovsl %edx, %esi
287+
; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi
288+
; X86-BMI-NEXT: cmovsl %edx, %edi
289+
; X86-BMI-NEXT: cmovnsl {{[0-9]+}}(%esp), %edx
290+
; X86-BMI-NEXT: movl %ecx, 12(%eax)
291+
; X86-BMI-NEXT: movl %edx, 8(%eax)
292+
; X86-BMI-NEXT: movl %edi, 4(%eax)
293+
; X86-BMI-NEXT: movl %esi, (%eax)
294+
; X86-BMI-NEXT: addl $4, %esp
295+
; X86-BMI-NEXT: popl %esi
296+
; X86-BMI-NEXT: popl %edi
297+
; X86-BMI-NEXT: retl $4
298+
;
299+
; X86-NOBMI-LABEL: test_i128_smax:
300+
; X86-NOBMI: # %bb.0:
301+
; X86-NOBMI-NEXT: pushl %edi
302+
; X86-NOBMI-NEXT: pushl %esi
303+
; X86-NOBMI-NEXT: pushl %eax
304+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax
305+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx
306+
; X86-NOBMI-NEXT: testl %ecx, %ecx
307+
; X86-NOBMI-NEXT: movl $0, %edx
308+
; X86-NOBMI-NEXT: movl $0, %esi
309+
; X86-NOBMI-NEXT: movl $0, %edi
310+
; X86-NOBMI-NEXT: js .LBB8_2
311+
; X86-NOBMI-NEXT: # %bb.1:
312+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi
313+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx
314+
; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi
315+
; X86-NOBMI-NEXT: .LBB8_2:
316+
; X86-NOBMI-NEXT: jg .LBB8_4
317+
; X86-NOBMI-NEXT: # %bb.3:
318+
; X86-NOBMI-NEXT: xorl %ecx, %ecx
319+
; X86-NOBMI-NEXT: .LBB8_4:
320+
; X86-NOBMI-NEXT: movl %ecx, 12(%eax)
321+
; X86-NOBMI-NEXT: movl %edi, 8(%eax)
322+
; X86-NOBMI-NEXT: movl %esi, 4(%eax)
323+
; X86-NOBMI-NEXT: movl %edx, (%eax)
324+
; X86-NOBMI-NEXT: addl $4, %esp
325+
; X86-NOBMI-NEXT: popl %esi
326+
; X86-NOBMI-NEXT: popl %edi
327+
; X86-NOBMI-NEXT: retl $4
328+
%r = call i128 @llvm.smax.i128(i128 %a, i128 0)
329+
ret i128 %r
330+
}
331+
332+
define i128 @test_i128_smin(i128 %a) nounwind {
333+
; X64-LABEL: test_i128_smin:
334+
; X64: # %bb.0:
335+
; X64-NEXT: movq %rdi, %rax
336+
; X64-NEXT: movq %rsi, %rdx
337+
; X64-NEXT: sarq $63, %rdx
338+
; X64-NEXT: andq %rdx, %rax
339+
; X64-NEXT: andq %rsi, %rdx
340+
; X64-NEXT: retq
341+
;
342+
; X86-LABEL: test_i128_smin:
343+
; X86: # %bb.0:
344+
; X86-NEXT: pushl %edi
345+
; X86-NEXT: pushl %esi
346+
; X86-NEXT: pushl %eax
347+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
348+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
349+
; X86-NEXT: movl %ecx, %edx
350+
; X86-NEXT: sarl $31, %edx
351+
; X86-NEXT: andl %edx, %ecx
352+
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
353+
; X86-NEXT: andl %edx, %esi
354+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
355+
; X86-NEXT: andl %edx, %edi
356+
; X86-NEXT: andl {{[0-9]+}}(%esp), %edx
357+
; X86-NEXT: movl %ecx, 12(%eax)
358+
; X86-NEXT: movl %edx, 8(%eax)
359+
; X86-NEXT: movl %edi, 4(%eax)
360+
; X86-NEXT: movl %esi, (%eax)
361+
; X86-NEXT: addl $4, %esp
362+
; X86-NEXT: popl %esi
363+
; X86-NEXT: popl %edi
364+
; X86-NEXT: retl $4
365+
%r = call i128 @llvm.smin.i128(i128 %a, i128 0)
366+
ret i128 %r
367+
}

0 commit comments

Comments
 (0)