Skip to content

Commit fd2de54

Browse files
committed
[X86] Canonicalize vXi64 SIGN_EXTEND_INREG vXi1 to use v2Xi32 splatted shifts instead
If somehow a vXi64 bool sign_extend_inreg pattern has been lowered to vector shifts (without PSRAQ support), then try to canonicalize to vXi32 shifts to improve likelihood of value tracking being able to fold them away. Using a PSLLQ and bitcasted PSRAD node make it very difficult for later fold to recover from this.
1 parent 68f1391 commit fd2de54

File tree

7 files changed

+88
-81
lines changed

7 files changed

+88
-81
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+23
Original file line numberDiff line numberDiff line change
@@ -49888,6 +49888,29 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
4988849888
return Res;
4988949889
}
4989049890

49891+
// Attempt to detect an expanded vXi64 SIGN_EXTEND_INREG vXi1 pattern, and
49892+
// convert to a splatted v2Xi32 SIGN_EXTEND_INREG pattern:
49893+
// psrad(pshufd(psllq(X,63),1,1,3,3),31) ->
49894+
// pshufd(psrad(pslld(X,31),31),0,0,2,2).
49895+
if (Opcode == X86ISD::VSRAI && NumBitsPerElt == 32 && ShiftVal == 31 &&
49896+
N0.getOpcode() == X86ISD::PSHUFD &&
49897+
N0.getConstantOperandVal(1) == getV4X86ShuffleImm({1, 1, 3, 3}) &&
49898+
N0->hasOneUse()) {
49899+
SDValue BC = peekThroughOneUseBitcasts(N0.getOperand(0));
49900+
if (BC.getOpcode() == X86ISD::VSHLI &&
49901+
BC.getScalarValueSizeInBits() == 64 &&
49902+
BC.getConstantOperandVal(1) == 63) {
49903+
SDLoc DL(N);
49904+
SDValue Src = BC.getOperand(0);
49905+
Src = DAG.getBitcast(VT, Src);
49906+
Src = DAG.getNode(X86ISD::PSHUFD, DL, VT, Src,
49907+
getV4X86ShuffleImm8ForMask({0, 0, 2, 2}, DL, DAG));
49908+
Src = DAG.getNode(X86ISD::VSHLI, DL, VT, Src, N1);
49909+
Src = DAG.getNode(X86ISD::VSRAI, DL, VT, Src, N1);
49910+
return Src;
49911+
}
49912+
}
49913+
4989149914
auto TryConstantFold = [&](SDValue V) {
4989249915
APInt UndefElts;
4989349916
SmallVector<APInt, 32> EltBits;

llvm/test/CodeGen/X86/icmp-abs-C-vec.ll

+14-18
Original file line numberDiff line numberDiff line change
@@ -587,9 +587,8 @@ define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
587587
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
588588
; SSE41-NEXT: orps %xmm2, %xmm0
589589
; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
590-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
591-
; SSE41-NEXT: psllq $63, %xmm0
592-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
590+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
591+
; SSE41-NEXT: pslld $31, %xmm1
593592
; SSE41-NEXT: psrad $31, %xmm1
594593
; SSE41-NEXT: movdqa %xmm2, %xmm0
595594
; SSE41-NEXT: retq
@@ -612,12 +611,11 @@ define <4 x i64> @eq_or_to_abs_vec4x64_sext(<4 x i64> %x) {
612611
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
613612
; SSE2-NEXT: andps %xmm3, %xmm0
614613
; SSE2-NEXT: orps %xmm2, %xmm0
615-
; SSE2-NEXT: pxor %xmm1, %xmm1
616-
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
617-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
618-
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
619-
; SSE2-NEXT: psllq $63, %xmm2
620-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
614+
; SSE2-NEXT: xorps %xmm2, %xmm2
615+
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
616+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
617+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
618+
; SSE2-NEXT: pslld $31, %xmm1
621619
; SSE2-NEXT: psrad $31, %xmm1
622620
; SSE2-NEXT: retq
623621
%cmp1 = icmp eq <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>
@@ -729,9 +727,8 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
729727
; SSE41-NEXT: orps %xmm2, %xmm0
730728
; SSE41-NEXT: xorps %xmm3, %xmm0
731729
; SSE41-NEXT: pmovsxdq %xmm0, %xmm2
732-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
733-
; SSE41-NEXT: psllq $63, %xmm0
734-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
730+
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
731+
; SSE41-NEXT: pslld $31, %xmm1
735732
; SSE41-NEXT: psrad $31, %xmm1
736733
; SSE41-NEXT: movdqa %xmm2, %xmm0
737734
; SSE41-NEXT: retq
@@ -756,12 +753,11 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) {
756753
; SSE2-NEXT: andps %xmm4, %xmm0
757754
; SSE2-NEXT: orps %xmm2, %xmm0
758755
; SSE2-NEXT: xorps %xmm3, %xmm0
759-
; SSE2-NEXT: pxor %xmm1, %xmm1
760-
; SSE2-NEXT: pcmpgtd %xmm0, %xmm1
761-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,1,3,3]
762-
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
763-
; SSE2-NEXT: psllq $63, %xmm2
764-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
756+
; SSE2-NEXT: xorps %xmm2, %xmm2
757+
; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
758+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3]
759+
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
760+
; SSE2-NEXT: pslld $31, %xmm1
765761
; SSE2-NEXT: psrad $31, %xmm1
766762
; SSE2-NEXT: retq
767763
%cmp1 = icmp ne <4 x i64> %x, <i64 129, i64 129, i64 129, i64 129>

llvm/test/CodeGen/X86/promote-cmp.ll

+2-3
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,8 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
3535
; SSE2-NEXT: pand %xmm4, %xmm0
3636
; SSE2-NEXT: pandn %xmm2, %xmm4
3737
; SSE2-NEXT: por %xmm4, %xmm0
38-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,1,3,3]
39-
; SSE2-NEXT: psllq $63, %xmm2
40-
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
38+
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3]
39+
; SSE2-NEXT: pslld $31, %xmm2
4140
; SSE2-NEXT: psrad $31, %xmm2
4241
; SSE2-NEXT: pand %xmm2, %xmm1
4342
; SSE2-NEXT: pandn %xmm3, %xmm2

llvm/test/CodeGen/X86/vector-bo-select.ll

+8-8
Original file line numberDiff line numberDiff line change
@@ -5510,8 +5510,8 @@ define <8 x i64> @select_sdiv_neutral_constant_v8i64(<8 x i1> %b, <8 x i64> %x,
55105510
; SSE2: # %bb.0:
55115511
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,2,2,2]
55125512
; SSE2-NEXT: pshufhw {{.*#+}} xmm8 = xmm8[0,1,2,3,5,5,5,5]
5513-
; SSE2-NEXT: psllq $63, %xmm8
5514-
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm8[1,1,3,3]
5513+
; SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,0,2,2]
5514+
; SSE2-NEXT: pslld $31, %xmm8
55155515
; SSE2-NEXT: psrad $31, %xmm8
55165516
; SSE2-NEXT: movdqa %xmm8, %xmm10
55175517
; SSE2-NEXT: pandn %xmm7, %xmm10
@@ -5520,26 +5520,26 @@ define <8 x i64> @select_sdiv_neutral_constant_v8i64(<8 x i1> %b, <8 x i64> %x,
55205520
; SSE2-NEXT: por %xmm10, %xmm8
55215521
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
55225522
; SSE2-NEXT: pshufhw {{.*#+}} xmm7 = xmm7[0,1,2,3,5,5,5,5]
5523-
; SSE2-NEXT: psllq $63, %xmm7
5524-
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
5523+
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,0,2,2]
5524+
; SSE2-NEXT: pslld $31, %xmm7
55255525
; SSE2-NEXT: psrad $31, %xmm7
55265526
; SSE2-NEXT: movdqa %xmm7, %xmm10
55275527
; SSE2-NEXT: pandn %xmm6, %xmm10
55285528
; SSE2-NEXT: pand %xmm9, %xmm7
55295529
; SSE2-NEXT: por %xmm10, %xmm7
55305530
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[0,0,0,0]
55315531
; SSE2-NEXT: pshufhw {{.*#+}} xmm6 = xmm6[0,1,2,3,5,5,5,5]
5532-
; SSE2-NEXT: psllq $63, %xmm6
5533-
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
5532+
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,2,2]
5533+
; SSE2-NEXT: pslld $31, %xmm6
55345534
; SSE2-NEXT: psrad $31, %xmm6
55355535
; SSE2-NEXT: movdqa %xmm6, %xmm10
55365536
; SSE2-NEXT: pandn %xmm5, %xmm10
55375537
; SSE2-NEXT: pand %xmm9, %xmm6
55385538
; SSE2-NEXT: por %xmm10, %xmm6
55395539
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
55405540
; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
5541-
; SSE2-NEXT: psllq $63, %xmm0
5542-
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
5541+
; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[0,0,2,2]
5542+
; SSE2-NEXT: pslld $31, %xmm5
55435543
; SSE2-NEXT: psrad $31, %xmm5
55445544
; SSE2-NEXT: pand %xmm5, %xmm9
55455545
; SSE2-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5

llvm/test/CodeGen/X86/vector-sext.ll

+18-28
Original file line numberDiff line numberDiff line change
@@ -1775,13 +1775,11 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
17751775
; SSE2-NEXT: shrb $3, %al
17761776
; SSE2-NEXT: movzbl %al, %eax
17771777
; SSE2-NEXT: pinsrw $6, %eax, %xmm1
1778-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
1779-
; SSE2-NEXT: psllq $63, %xmm0
1780-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1778+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
1779+
; SSE2-NEXT: pslld $31, %xmm0
17811780
; SSE2-NEXT: psrad $31, %xmm0
1782-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
1783-
; SSE2-NEXT: psllq $63, %xmm1
1784-
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1781+
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1782+
; SSE2-NEXT: pslld $31, %xmm1
17851783
; SSE2-NEXT: psrad $31, %xmm1
17861784
; SSE2-NEXT: retq
17871785
;
@@ -1805,13 +1803,11 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
18051803
; SSSE3-NEXT: shrb $3, %al
18061804
; SSSE3-NEXT: movzbl %al, %eax
18071805
; SSSE3-NEXT: pinsrw $6, %eax, %xmm1
1808-
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
1809-
; SSSE3-NEXT: psllq $63, %xmm0
1810-
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1806+
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
1807+
; SSSE3-NEXT: pslld $31, %xmm0
18111808
; SSSE3-NEXT: psrad $31, %xmm0
1812-
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
1813-
; SSSE3-NEXT: psllq $63, %xmm1
1814-
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1809+
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1810+
; SSSE3-NEXT: pslld $31, %xmm1
18151811
; SSSE3-NEXT: psrad $31, %xmm1
18161812
; SSSE3-NEXT: retq
18171813
;
@@ -1831,17 +1827,15 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
18311827
; SSE41-NEXT: shrb $2, %cl
18321828
; SSE41-NEXT: andb $1, %cl
18331829
; SSE41-NEXT: movzbl %cl, %ecx
1834-
; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
18351830
; SSE41-NEXT: pinsrb $8, %ecx, %xmm1
18361831
; SSE41-NEXT: shrb $3, %al
18371832
; SSE41-NEXT: movzbl %al, %eax
1833+
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
18381834
; SSE41-NEXT: pinsrb $12, %eax, %xmm1
1839-
; SSE41-NEXT: psllq $63, %xmm0
1840-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1835+
; SSE41-NEXT: pslld $31, %xmm0
18411836
; SSE41-NEXT: psrad $31, %xmm0
18421837
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1843-
; SSE41-NEXT: psllq $63, %xmm1
1844-
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1838+
; SSE41-NEXT: pslld $31, %xmm1
18451839
; SSE41-NEXT: psrad $31, %xmm1
18461840
; SSE41-NEXT: retq
18471841
;
@@ -1939,13 +1933,11 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
19391933
; X86-SSE2-NEXT: shrb $3, %al
19401934
; X86-SSE2-NEXT: movzbl %al, %eax
19411935
; X86-SSE2-NEXT: pinsrw $6, %eax, %xmm1
1942-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3]
1943-
; X86-SSE2-NEXT: psllq $63, %xmm0
1944-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1936+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
1937+
; X86-SSE2-NEXT: pslld $31, %xmm0
19451938
; X86-SSE2-NEXT: psrad $31, %xmm0
1946-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3]
1947-
; X86-SSE2-NEXT: psllq $63, %xmm1
1948-
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1939+
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1940+
; X86-SSE2-NEXT: pslld $31, %xmm1
19491941
; X86-SSE2-NEXT: psrad $31, %xmm1
19501942
; X86-SSE2-NEXT: retl
19511943
;
@@ -1966,17 +1958,15 @@ define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) {
19661958
; X86-SSE41-NEXT: shrb $2, %cl
19671959
; X86-SSE41-NEXT: andb $1, %cl
19681960
; X86-SSE41-NEXT: movzbl %cl, %ecx
1969-
; X86-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero
19701961
; X86-SSE41-NEXT: pinsrb $8, %ecx, %xmm1
19711962
; X86-SSE41-NEXT: shrb $3, %al
19721963
; X86-SSE41-NEXT: movzbl %al, %eax
1964+
; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,0,1,1]
19731965
; X86-SSE41-NEXT: pinsrb $12, %eax, %xmm1
1974-
; X86-SSE41-NEXT: psllq $63, %xmm0
1975-
; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
1966+
; X86-SSE41-NEXT: pslld $31, %xmm0
19761967
; X86-SSE41-NEXT: psrad $31, %xmm0
19771968
; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1978-
; X86-SSE41-NEXT: psllq $63, %xmm1
1979-
; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1969+
; X86-SSE41-NEXT: pslld $31, %xmm1
19801970
; X86-SSE41-NEXT: psrad $31, %xmm1
19811971
; X86-SSE41-NEXT: retl
19821972
entry:

llvm/test/CodeGen/X86/vselect.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -483,8 +483,8 @@ define <16 x double> @select_illegal(<16 x double> %a, <16 x double> %b) {
483483
define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
484484
; SSE2-LABEL: shrunkblend_2uses:
485485
; SSE2: # %bb.0:
486-
; SSE2-NEXT: psllq $63, %xmm0
487-
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
486+
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2]
487+
; SSE2-NEXT: pslld $31, %xmm0
488488
; SSE2-NEXT: psrad $31, %xmm0
489489
; SSE2-NEXT: movdqa %xmm0, %xmm5
490490
; SSE2-NEXT: pandn %xmm2, %xmm5
@@ -522,8 +522,8 @@ define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b,
522522
define <2 x i64> @shrunkblend_nonvselectuse(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) {
523523
; SSE2-LABEL: shrunkblend_nonvselectuse:
524524
; SSE2: # %bb.0:
525-
; SSE2-NEXT: psllq $63, %xmm0
526-
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
525+
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,2,2]
526+
; SSE2-NEXT: pslld $31, %xmm3
527527
; SSE2-NEXT: psrad $31, %xmm3
528528
; SSE2-NEXT: movdqa %xmm3, %xmm0
529529
; SSE2-NEXT: pandn %xmm2, %xmm0

llvm/test/CodeGen/X86/vsplit-and.ll

+19-20
Original file line numberDiff line numberDiff line change
@@ -23,31 +23,30 @@ define void @t0(ptr %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
2323
define void @t2(ptr %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
2424
; CHECK-LABEL: t2:
2525
; CHECK: # %bb.0:
26-
; CHECK-NEXT: movq %r9, %xmm1
27-
; CHECK-NEXT: movq %r8, %xmm0
28-
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
29-
; CHECK-NEXT: movq %rdx, %xmm1
26+
; CHECK-NEXT: movq %r9, %xmm0
27+
; CHECK-NEXT: movq %r8, %xmm1
28+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
29+
; CHECK-NEXT: movq %rdx, %xmm0
3030
; CHECK-NEXT: movq %rsi, %xmm2
31-
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0]
32-
; CHECK-NEXT: movq %rcx, %xmm1
31+
; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0]
32+
; CHECK-NEXT: movq %rcx, %xmm0
3333
; CHECK-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
3434
; CHECK-NEXT: pxor %xmm4, %xmm4
35-
; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
35+
; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
3636
; CHECK-NEXT: pcmpeqq %xmm4, %xmm2
37-
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2]
38-
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
37+
; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2]
38+
; CHECK-NEXT: pcmpeqd %xmm0, %xmm0
3939
; CHECK-NEXT: pcmpeqq %xmm4, %xmm3
40-
; CHECK-NEXT: pcmpeqq %xmm4, %xmm0
41-
; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
42-
; CHECK-NEXT: orps %xmm2, %xmm0
43-
; CHECK-NEXT: xorps %xmm1, %xmm0
44-
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
45-
; CHECK-NEXT: psllq $63, %xmm1
46-
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
47-
; CHECK-NEXT: psrad $31, %xmm1
48-
; CHECK-NEXT: pmovsxdq %xmm0, %xmm0
49-
; CHECK-NEXT: movdqa %xmm0, (%rdi)
50-
; CHECK-NEXT: movq %xmm1, 16(%rdi)
40+
; CHECK-NEXT: pcmpeqq %xmm4, %xmm1
41+
; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm3[0,2]
42+
; CHECK-NEXT: orps %xmm2, %xmm1
43+
; CHECK-NEXT: xorps %xmm0, %xmm1
44+
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,2,2]
45+
; CHECK-NEXT: pslld $31, %xmm0
46+
; CHECK-NEXT: psrad $31, %xmm0
47+
; CHECK-NEXT: pmovsxdq %xmm1, %xmm1
48+
; CHECK-NEXT: movdqa %xmm1, (%rdi)
49+
; CHECK-NEXT: movq %xmm0, 16(%rdi)
5150
; CHECK-NEXT: retq
5251
%cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
5352
%cmp2 = icmp ne <3 x i64> %src2, zeroinitializer

0 commit comments

Comments
 (0)