Skip to content

Commit 5b5dc24

Browse files
committed
[X86][AVX] getTargetShuffleMask - don't decode VBROADCAST(EXTRACT_SUBVECTOR(X,0)) patterns.
getTargetShuffleMask is used by the various "SimplifyDemanded" folds so we can't assume that the bypassed extract_subvector can be safely simplified - getFauxShuffleMask performs a more general decode that allows us to more safely catch many of these cases so the impact is minimal.
1 parent 9e78371 commit 5b5dc24

8 files changed

+244
-259
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+5-14
Original file line numberDiff line numberDiff line change
@@ -6916,25 +6916,16 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
69166916
DecodeZeroMoveLowMask(NumElems, Mask);
69176917
IsUnary = true;
69186918
break;
6919-
case X86ISD::VBROADCAST: {
6920-
SDValue N0 = N->getOperand(0);
6921-
// See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
6922-
// add the pre-extracted value to the Ops vector.
6923-
if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
6924-
N0.getOperand(0).getValueType() == VT &&
6925-
N0.getConstantOperandVal(1) == 0)
6926-
Ops.push_back(N0.getOperand(0));
6927-
6928-
// We only decode broadcasts of same-sized vectors, unless the broadcast
6929-
// came from an extract from the original width. If we found one, we
6930-
// pushed it the Ops vector above.
6931-
if (N0.getValueType() == VT || !Ops.empty()) {
6919+
case X86ISD::VBROADCAST:
6920+
// We only decode broadcasts of same-sized vectors, peeking through to
6921+
// extracted subvectors is likely to cause hasOneUse issues with
6922+
// SimplifyDemandedBits etc.
6923+
if (N->getOperand(0).getValueType() == VT) {
69326924
DecodeVectorBroadcast(NumElems, Mask);
69336925
IsUnary = true;
69346926
break;
69356927
}
69366928
return false;
6937-
}
69386929
case X86ISD::VPERMILPV: {
69396930
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
69406931
IsUnary = true;

llvm/test/CodeGen/X86/vector-fshl-256.ll

+19-19
Original file line numberDiff line numberDiff line change
@@ -1092,12 +1092,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
10921092
; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
10931093
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
10941094
; AVX2-NEXT: vpslld %xmm3, %ymm0, %ymm3
1095+
; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2
10951096
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
10961097
; AVX2-NEXT: vpsubd %xmm2, %xmm4, %xmm4
10971098
; AVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
10981099
; AVX2-NEXT: vpsrld %xmm4, %ymm1, %ymm1
10991100
; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1
1100-
; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2
11011101
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
11021102
; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
11031103
; AVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
@@ -1110,12 +1110,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
11101110
; AVX512F-NEXT: vpand %xmm3, %xmm2, %xmm2
11111111
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
11121112
; AVX512F-NEXT: vpslld %xmm3, %ymm0, %ymm3
1113+
; AVX512F-NEXT: vpbroadcastd %xmm2, %ymm2
11131114
; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
11141115
; AVX512F-NEXT: vpsubd %xmm2, %xmm4, %xmm4
11151116
; AVX512F-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
11161117
; AVX512F-NEXT: vpsrld %xmm4, %ymm1, %ymm1
11171118
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
1118-
; AVX512F-NEXT: vpbroadcastd %xmm2, %ymm2
11191119
; AVX512F-NEXT: vptestnmd %zmm2, %zmm2, %k1
11201120
; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
11211121
; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
@@ -1126,12 +1126,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
11261126
; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2
11271127
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
11281128
; AVX512VL-NEXT: vpslld %xmm3, %ymm0, %ymm3
1129+
; AVX512VL-NEXT: vpbroadcastd %xmm2, %ymm2
11291130
; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
11301131
; AVX512VL-NEXT: vpsubd %xmm2, %xmm4, %xmm4
11311132
; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
11321133
; AVX512VL-NEXT: vpsrld %xmm4, %ymm1, %ymm1
11331134
; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
1134-
; AVX512VL-NEXT: vpbroadcastd %xmm2, %ymm2
11351135
; AVX512VL-NEXT: vptestnmd %ymm2, %ymm2, %k1
11361136
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
11371137
; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
@@ -1144,12 +1144,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
11441144
; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2
11451145
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
11461146
; AVX512BW-NEXT: vpslld %xmm3, %ymm0, %ymm3
1147+
; AVX512BW-NEXT: vpbroadcastd %xmm2, %ymm2
11471148
; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
11481149
; AVX512BW-NEXT: vpsubd %xmm2, %xmm4, %xmm4
11491150
; AVX512BW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
11501151
; AVX512BW-NEXT: vpsrld %xmm4, %ymm1, %ymm1
11511152
; AVX512BW-NEXT: vpor %ymm1, %ymm3, %ymm1
1152-
; AVX512BW-NEXT: vpbroadcastd %xmm2, %ymm2
11531153
; AVX512BW-NEXT: vptestnmd %zmm2, %zmm2, %k1
11541154
; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
11551155
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
@@ -1162,12 +1162,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
11621162
; AVX512VBMI2-NEXT: vpand %xmm3, %xmm2, %xmm2
11631163
; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
11641164
; AVX512VBMI2-NEXT: vpslld %xmm3, %ymm0, %ymm3
1165+
; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
11651166
; AVX512VBMI2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
11661167
; AVX512VBMI2-NEXT: vpsubd %xmm2, %xmm4, %xmm4
11671168
; AVX512VBMI2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
11681169
; AVX512VBMI2-NEXT: vpsrld %xmm4, %ymm1, %ymm1
11691170
; AVX512VBMI2-NEXT: vpor %ymm1, %ymm3, %ymm1
1170-
; AVX512VBMI2-NEXT: vpbroadcastd %xmm2, %ymm2
11711171
; AVX512VBMI2-NEXT: vptestnmd %zmm2, %zmm2, %k1
11721172
; AVX512VBMI2-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
11731173
; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
@@ -1178,12 +1178,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
11781178
; AVX512VLBW-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm2, %xmm2
11791179
; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
11801180
; AVX512VLBW-NEXT: vpslld %xmm3, %ymm0, %ymm3
1181+
; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %ymm2
11811182
; AVX512VLBW-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
11821183
; AVX512VLBW-NEXT: vpsubd %xmm2, %xmm4, %xmm4
11831184
; AVX512VLBW-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
11841185
; AVX512VLBW-NEXT: vpsrld %xmm4, %ymm1, %ymm1
11851186
; AVX512VLBW-NEXT: vpor %ymm1, %ymm3, %ymm1
1186-
; AVX512VLBW-NEXT: vpbroadcastd %xmm2, %ymm2
11871187
; AVX512VLBW-NEXT: vptestnmd %ymm2, %ymm2, %k1
11881188
; AVX512VLBW-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
11891189
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
@@ -1224,12 +1224,12 @@ define <8 x i32> @splatvar_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y, <8 x i32> %
12241224
; XOPAVX2-NEXT: vpand %xmm3, %xmm2, %xmm2
12251225
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
12261226
; XOPAVX2-NEXT: vpslld %xmm3, %ymm0, %ymm3
1227+
; XOPAVX2-NEXT: vpbroadcastd %xmm2, %ymm2
12271228
; XOPAVX2-NEXT: vpbroadcastd {{.*#+}} xmm4 = [32,32,32,32]
12281229
; XOPAVX2-NEXT: vpsubd %xmm2, %xmm4, %xmm4
12291230
; XOPAVX2-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
12301231
; XOPAVX2-NEXT: vpsrld %xmm4, %ymm1, %ymm1
12311232
; XOPAVX2-NEXT: vpor %ymm1, %ymm3, %ymm1
1232-
; XOPAVX2-NEXT: vpbroadcastd %xmm2, %ymm2
12331233
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
12341234
; XOPAVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm2
12351235
; XOPAVX2-NEXT: vblendvps %ymm2, %ymm0, %ymm1, %ymm0
@@ -1271,12 +1271,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
12711271
; AVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
12721272
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
12731273
; AVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm3
1274+
; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
12741275
; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
12751276
; AVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm4
12761277
; AVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
12771278
; AVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
12781279
; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1
1279-
; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
12801280
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
12811281
; AVX2-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
12821282
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
@@ -1287,12 +1287,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
12871287
; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
12881288
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
12891289
; AVX512F-NEXT: vpsllw %xmm3, %ymm0, %ymm3
1290+
; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2
12901291
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
12911292
; AVX512F-NEXT: vpsubw %xmm2, %xmm4, %xmm4
12921293
; AVX512F-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
12931294
; AVX512F-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
12941295
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
1295-
; AVX512F-NEXT: vpbroadcastw %xmm2, %ymm2
12961296
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
12971297
; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
12981298
; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
@@ -1303,12 +1303,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
13031303
; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
13041304
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
13051305
; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3
1306+
; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2
13061307
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
13071308
; AVX512VL-NEXT: vpsubw %xmm2, %xmm4, %xmm4
13081309
; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
13091310
; AVX512VL-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
13101311
; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1
1311-
; AVX512VL-NEXT: vpbroadcastw %xmm2, %ymm2
13121312
; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
13131313
; AVX512VL-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
13141314
; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
@@ -1320,12 +1320,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
13201320
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
13211321
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
13221322
; AVX512BW-NEXT: vpsllw %xmm3, %ymm0, %ymm3
1323+
; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
13231324
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
13241325
; AVX512BW-NEXT: vpsubw %xmm2, %xmm4, %xmm4
13251326
; AVX512BW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
13261327
; AVX512BW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
13271328
; AVX512BW-NEXT: vpor %ymm1, %ymm3, %ymm1
1328-
; AVX512BW-NEXT: vpbroadcastw %xmm2, %ymm2
13291329
; AVX512BW-NEXT: vptestnmw %zmm2, %zmm2, %k1
13301330
; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
13311331
; AVX512BW-NEXT: vmovdqa %ymm1, %ymm0
@@ -1337,12 +1337,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
13371337
; AVX512VBMI2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
13381338
; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
13391339
; AVX512VBMI2-NEXT: vpsllw %xmm3, %ymm0, %ymm3
1340+
; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
13401341
; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
13411342
; AVX512VBMI2-NEXT: vpsubw %xmm2, %xmm4, %xmm4
13421343
; AVX512VBMI2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
13431344
; AVX512VBMI2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
13441345
; AVX512VBMI2-NEXT: vpor %ymm1, %ymm3, %ymm1
1345-
; AVX512VBMI2-NEXT: vpbroadcastw %xmm2, %ymm2
13461346
; AVX512VBMI2-NEXT: vptestnmw %zmm2, %zmm2, %k1
13471347
; AVX512VBMI2-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
13481348
; AVX512VBMI2-NEXT: vmovdqa %ymm1, %ymm0
@@ -1353,12 +1353,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
13531353
; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
13541354
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
13551355
; AVX512VLBW-NEXT: vpsllw %xmm3, %ymm0, %ymm3
1356+
; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
13561357
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
13571358
; AVX512VLBW-NEXT: vpsubw %xmm2, %xmm4, %xmm4
13581359
; AVX512VLBW-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
13591360
; AVX512VLBW-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
13601361
; AVX512VLBW-NEXT: vpor %ymm1, %ymm3, %ymm1
1361-
; AVX512VLBW-NEXT: vpbroadcastw %xmm2, %ymm2
13621362
; AVX512VLBW-NEXT: vptestnmw %ymm2, %ymm2, %k1
13631363
; AVX512VLBW-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
13641364
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
@@ -1399,12 +1399,12 @@ define <16 x i16> @splatvar_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i
13991399
; XOPAVX2-NEXT: vpand {{.*}}(%rip), %xmm2, %xmm2
14001400
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero
14011401
; XOPAVX2-NEXT: vpsllw %xmm3, %ymm0, %ymm3
1402+
; XOPAVX2-NEXT: vpbroadcastw %xmm2, %ymm2
14021403
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [16,16,16,16,16,16,16,16]
14031404
; XOPAVX2-NEXT: vpsubw %xmm2, %xmm4, %xmm4
14041405
; XOPAVX2-NEXT: vpmovzxwq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero
14051406
; XOPAVX2-NEXT: vpsrlw %xmm4, %ymm1, %ymm1
14061407
; XOPAVX2-NEXT: vpor %ymm1, %ymm3, %ymm1
1407-
; XOPAVX2-NEXT: vpbroadcastw %xmm2, %ymm2
14081408
; XOPAVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
14091409
; XOPAVX2-NEXT: vpcmpeqw %ymm3, %ymm2, %ymm2
14101410
; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
@@ -1458,6 +1458,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
14581458
; AVX2-NEXT: vpsllw %xmm3, %xmm5, %xmm3
14591459
; AVX2-NEXT: vpbroadcastb %xmm3, %ymm3
14601460
; AVX2-NEXT: vpand %ymm3, %ymm4, %ymm3
1461+
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
14611462
; AVX2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
14621463
; AVX2-NEXT: vpsubb %xmm2, %xmm4, %xmm4
14631464
; AVX2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
@@ -1467,7 +1468,6 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
14671468
; AVX2-NEXT: vpbroadcastb %xmm4, %ymm4
14681469
; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
14691470
; AVX2-NEXT: vpor %ymm1, %ymm3, %ymm1
1470-
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
14711471
; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
14721472
; AVX2-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
14731473
; AVX2-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
@@ -1482,6 +1482,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
14821482
; AVX512F-NEXT: vpsllw %xmm3, %xmm5, %xmm3
14831483
; AVX512F-NEXT: vpbroadcastb %xmm3, %ymm3
14841484
; AVX512F-NEXT: vpand %ymm3, %ymm4, %ymm3
1485+
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
14851486
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
14861487
; AVX512F-NEXT: vpsubb %xmm2, %xmm4, %xmm4
14871488
; AVX512F-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
@@ -1491,7 +1492,6 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
14911492
; AVX512F-NEXT: vpbroadcastb %xmm4, %ymm4
14921493
; AVX512F-NEXT: vpand %ymm4, %ymm1, %ymm1
14931494
; AVX512F-NEXT: vpor %ymm1, %ymm3, %ymm1
1494-
; AVX512F-NEXT: vpbroadcastb %xmm2, %ymm2
14951495
; AVX512F-NEXT: vpxor %xmm3, %xmm3, %xmm3
14961496
; AVX512F-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2
14971497
; AVX512F-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
@@ -1506,6 +1506,7 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
15061506
; AVX512VL-NEXT: vpsllw %xmm3, %xmm5, %xmm3
15071507
; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3
15081508
; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3
1509+
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2
15091510
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8]
15101511
; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4
15111512
; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero
@@ -1514,9 +1515,8 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> %
15141515
; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4
15151516
; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4
15161517
; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm3, %ymm4
1517-
; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm1
1518-
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
1519-
; AVX512VL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1
1518+
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1519+
; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1
15201520
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm0, %ymm4, %ymm0
15211521
; AVX512VL-NEXT: retq
15221522
;

0 commit comments

Comments
 (0)