Skip to content

Commit 77077c6

Browse files
RKSimonyuxuanchen1997
authored andcommitted
[X86] canCreateUndefOrPoisonForTargetNode - PMADDWD/PMADDUBSW intrinsics don't create poison
Summary: Fix regression introduced by #84924 Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250574
1 parent 0ee14de commit 77077c6

File tree

2 files changed

+16
-41
lines changed

2 files changed

+16
-41
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43328,6 +43328,16 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode(
4332843328
case X86ISD::UNPCKH:
4332943329
case X86ISD::UNPCKL:
4333043330
return false;
43331+
case ISD::INTRINSIC_WO_CHAIN:
43332+
switch (Op->getConstantOperandVal(0)) {
43333+
case Intrinsic::x86_sse2_pmadd_wd:
43334+
case Intrinsic::x86_avx2_pmadd_wd:
43335+
case Intrinsic::x86_avx512_pmaddw_d_512:
43336+
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
43337+
case Intrinsic::x86_avx2_pmadd_ub_sw:
43338+
case Intrinsic::x86_avx512_pmaddubs_w_512:
43339+
return false;
43340+
}
4333143341
}
4333243342
return TargetLowering::canCreateUndefOrPoisonForTargetNode(
4333343343
Op, DemandedElts, DAG, PoisonOnly, ConsiderFlags, Depth);

llvm/test/CodeGen/X86/combine-pmadd.ll

Lines changed: 6 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,8 @@ define <8 x i32> @combine_pmaddwd_concat_freeze(<8 x i16> %a0, <8 x i16> %a1) {
8282
; AVX2-LABEL: combine_pmaddwd_concat_freeze:
8383
; AVX2: # %bb.0:
8484
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
85-
; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
8685
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
87-
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm1
88-
; AVX2-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0
86+
; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
8987
; AVX2-NEXT: retq
9088
%lo = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
9189
%hi = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
@@ -229,10 +227,8 @@ define <16 x i16> @combine_pmaddubsw_concat_freeze(<16 x i8> %a0, <16 x i8> %a1)
229227
; AVX2-LABEL: combine_pmaddubsw_concat_freeze:
230228
; AVX2: # %bb.0:
231229
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
232-
; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
233230
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
234-
; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm2, %ymm1
235-
; AVX2-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0
231+
; AVX2-NEXT: vpmaddubsw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
236232
; AVX2-NEXT: retq
237233
%lo = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
238234
%hi = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
@@ -293,41 +289,10 @@ define i32 @combine_pmaddubsw_constant_sat() {
293289

294290
; Constant folding PMADDWD was causing an infinite loop in the PCMPGT commuting between 2 constant values.
295291
define i1 @pmaddwd_pcmpgt_infinite_loop() {
296-
; SSE-LABEL: pmaddwd_pcmpgt_infinite_loop:
297-
; SSE: # %bb.0:
298-
; SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
299-
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
300-
; SSE-NEXT: movdqa %xmm0, %xmm1
301-
; SSE-NEXT: psrld $1, %xmm1
302-
; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
303-
; SSE-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
304-
; SSE-NEXT: pand %xmm0, %xmm1
305-
; SSE-NEXT: movmskps %xmm1, %eax
306-
; SSE-NEXT: testl %eax, %eax
307-
; SSE-NEXT: sete %al
308-
; SSE-NEXT: retq
309-
;
310-
; AVX1-LABEL: pmaddwd_pcmpgt_infinite_loop:
311-
; AVX1: # %bb.0:
312-
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
313-
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
314-
; AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
315-
; AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
316-
; AVX1-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
317-
; AVX1-NEXT: vtestps %xmm1, %xmm0
318-
; AVX1-NEXT: sete %al
319-
; AVX1-NEXT: retq
320-
;
321-
; AVX2-LABEL: pmaddwd_pcmpgt_infinite_loop:
322-
; AVX2: # %bb.0:
323-
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
324-
; AVX2-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
325-
; AVX2-NEXT: vpsrld $1, %xmm0, %xmm1
326-
; AVX2-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
327-
; AVX2-NEXT: vpcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
328-
; AVX2-NEXT: vtestps %xmm1, %xmm0
329-
; AVX2-NEXT: sete %al
330-
; AVX2-NEXT: retq
292+
; CHECK-LABEL: pmaddwd_pcmpgt_infinite_loop:
293+
; CHECK: # %bb.0:
294+
; CHECK-NEXT: movb $1, %al
295+
; CHECK-NEXT: retq
331296
%1 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <8 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>)
332297
%2 = icmp eq <4 x i32> %1, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
333298
%3 = select <4 x i1> %2, <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> zeroinitializer

0 commit comments

Comments
 (0)