@@ -9,8 +9,6 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
9
9
; AVX512F-LABEL: shuf2i1_1_0:
10
10
; AVX512F: # %bb.0:
11
11
; AVX512F-NEXT: vpsllq $63, %xmm0, %xmm0
12
- ; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
13
- ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
14
12
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
15
13
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k1
16
14
; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
@@ -21,19 +19,15 @@ define <2 x i1> @shuf2i1_1_0(<2 x i1> %a) {
21
19
; AVX512VL-LABEL: shuf2i1_1_0:
22
20
; AVX512VL: # %bb.0:
23
21
; AVX512VL-NEXT: vpsllq $63, %xmm0, %xmm0
22
+ ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
24
23
; AVX512VL-NEXT: vptestmq %xmm0, %xmm0, %k1
25
24
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
26
- ; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1} {z}
27
- ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
28
- ; AVX512VL-NEXT: vptestmq %xmm1, %xmm1, %k1
29
25
; AVX512VL-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
30
26
; AVX512VL-NEXT: retq
31
27
;
32
28
; VL_BW_DQ-LABEL: shuf2i1_1_0:
33
29
; VL_BW_DQ: # %bb.0:
34
30
; VL_BW_DQ-NEXT: vpsllq $63, %xmm0, %xmm0
35
- ; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
36
- ; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
37
31
; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
38
32
; VL_BW_DQ-NEXT: vpmovq2m %xmm0, %k0
39
33
; VL_BW_DQ-NEXT: vpmovm2q %k0, %xmm0
@@ -86,10 +80,8 @@ define <2 x i1> @shuf2i1_1_2(<2 x i1> %a) {
86
80
define <4 x i1 > @shuf4i1_3_2_10 (<4 x i1 > %a ) {
87
81
; AVX512F-LABEL: shuf4i1_3_2_10:
88
82
; AVX512F: # %bb.0:
89
- ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
90
- ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
91
- ; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
92
83
; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
84
+ ; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
93
85
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
94
86
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
95
87
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
@@ -98,21 +90,17 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
98
90
;
99
91
; AVX512VL-LABEL: shuf4i1_3_2_10:
100
92
; AVX512VL: # %bb.0:
93
+ ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
101
94
; AVX512VL-NEXT: vpslld $31, %xmm0, %xmm0
102
95
; AVX512VL-NEXT: vptestmd %xmm0, %xmm0, %k1
103
96
; AVX512VL-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
104
- ; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1} {z}
105
- ; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,2,1,0]
106
- ; AVX512VL-NEXT: vptestmd %xmm1, %xmm1, %k1
107
97
; AVX512VL-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
108
98
; AVX512VL-NEXT: retq
109
99
;
110
100
; VL_BW_DQ-LABEL: shuf4i1_3_2_10:
111
101
; VL_BW_DQ: # %bb.0:
112
- ; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0
113
- ; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0
114
- ; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0
115
102
; VL_BW_DQ-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
103
+ ; VL_BW_DQ-NEXT: vpslld $31, %xmm0, %xmm0
116
104
; VL_BW_DQ-NEXT: vpmovd2m %xmm0, %k0
117
105
; VL_BW_DQ-NEXT: vpmovm2d %k0, %xmm0
118
106
; VL_BW_DQ-NEXT: retq
@@ -123,11 +111,10 @@ define <4 x i1> @shuf4i1_3_2_10(<4 x i1> %a) {
123
111
define <8 x i1 > @shuf8i1_3_6_1_0_3_7_7_0 (<8 x i64 > %a , <8 x i64 > %b , <8 x i64 > %a1 , <8 x i64 > %b1 ) {
124
112
; AVX512F-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
125
113
; AVX512F: # %bb.0:
126
- ; AVX512F-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
127
- ; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
128
114
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
115
+ ; AVX512F-NEXT: vpermq %zmm2, %zmm1, %zmm2
129
116
; AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0
130
- ; AVX512F-NEXT: vptestmq %zmm0 , %zmm0, %k1
117
+ ; AVX512F-NEXT: vpcmpeqq %zmm2 , %zmm0, %k1
131
118
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
132
119
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
133
120
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
@@ -136,24 +123,22 @@ define <8 x i1> @shuf8i1_3_6_1_0_3_7_7_0(<8 x i64> %a, <8 x i64> %b, <8 x i64> %
136
123
;
137
124
; AVX512VL-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
138
125
; AVX512VL: # %bb.0:
126
+ ; AVX512VL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
127
+ ; AVX512VL-NEXT: vpermq %zmm2, %zmm1, %zmm2
128
+ ; AVX512VL-NEXT: vpermq %zmm0, %zmm1, %zmm0
139
129
; AVX512VL-NEXT: vpcmpeqq %zmm2, %zmm0, %k1
140
130
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
141
- ; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
142
- ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,1,0,3,7,7,0]
143
- ; AVX512VL-NEXT: vpermd %ymm1, %ymm2, %ymm1
144
- ; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
145
131
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
146
132
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
147
133
; AVX512VL-NEXT: vzeroupper
148
134
; AVX512VL-NEXT: retq
149
135
;
150
136
; VL_BW_DQ-LABEL: shuf8i1_3_6_1_0_3_7_7_0:
151
137
; VL_BW_DQ: # %bb.0:
138
+ ; VL_BW_DQ-NEXT: vmovdqa64 {{.*#+}} zmm1 = [3,6,1,0,3,7,7,0]
139
+ ; VL_BW_DQ-NEXT: vpermq %zmm2, %zmm1, %zmm2
140
+ ; VL_BW_DQ-NEXT: vpermq %zmm0, %zmm1, %zmm0
152
141
; VL_BW_DQ-NEXT: vpcmpeqq %zmm2, %zmm0, %k0
153
- ; VL_BW_DQ-NEXT: vpmovm2d %k0, %ymm0
154
- ; VL_BW_DQ-NEXT: vmovdqa {{.*#+}} ymm1 = [3,6,1,0,3,7,7,0]
155
- ; VL_BW_DQ-NEXT: vpermd %ymm0, %ymm1, %ymm0
156
- ; VL_BW_DQ-NEXT: vpmovd2m %ymm0, %k0
157
142
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
158
143
; VL_BW_DQ-NEXT: vzeroupper
159
144
; VL_BW_DQ-NEXT: retq
0 commit comments