Skip to content

Commit dcfcfdb

Browse files
committed
[X86] Converge X86ISD::VPERMV3 and X86ISD::VPERMIV3 to a single opcode.
These do the same thing with the first and second sources swapped. They previously came from separate intrinsics that specified different masking behavior. But we can cover that with isel patterns and a single node. This is a step towards reducing the number of intrinsics needed. A bunch of tests change because we are now biased to choosing VPERMT over VPERMI when there is nothing to signal that commuting is beneficial. llvm-svn: 333383
1 parent 6b54518 commit dcfcfdb

11 files changed

+109
-118
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4372,7 +4372,6 @@ static bool isTargetShuffle(unsigned Opcode) {
43724372
case X86ISD::VPPERM:
43734373
case X86ISD::VPERMV:
43744374
case X86ISD::VPERMV3:
4375-
case X86ISD::VPERMIV3:
43764375
case X86ISD::VZEXT_MOVL:
43774376
return true;
43784377
}
@@ -4388,7 +4387,6 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
43884387
case X86ISD::VPPERM:
43894388
case X86ISD::VPERMV:
43904389
case X86ISD::VPERMV3:
4391-
case X86ISD::VPERMIV3:
43924390
return true;
43934391
// 'Faux' Target Shuffles.
43944392
case ISD::AND:
@@ -5977,21 +5975,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
59775975
}
59785976
return false;
59795977
}
5980-
case X86ISD::VPERMIV3: {
5981-
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
5982-
assert(N->getOperand(2).getValueType() == VT && "Unexpected value type");
5983-
IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
5984-
// Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
5985-
Ops.push_back(N->getOperand(1));
5986-
Ops.push_back(N->getOperand(2));
5987-
SDValue MaskNode = N->getOperand(0);
5988-
unsigned MaskEltSize = VT.getScalarSizeInBits();
5989-
if (auto *C = getTargetConstantFromNode(MaskNode)) {
5990-
DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5991-
break;
5992-
}
5993-
return false;
5994-
}
59955978
default: llvm_unreachable("unknown target shuffle node");
59965979
}
59975980

@@ -20540,9 +20523,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2054020523
SDValue Src3 = Op.getOperand(3);
2054120524
SDValue Mask = Op.getOperand(4);
2054220525
MVT VT = Op.getSimpleValueType();
20543-
SDValue PassThru = SDValue();
2054420526

2054520527
// set PassThru element
20528+
SDValue PassThru;
2054620529
if (IntrData->Type == VPERM_3OP_MASKZ)
2054720530
PassThru = getZeroVector(VT, Subtarget, DAG, dl);
2054820531
else
@@ -20554,6 +20537,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2055420537
Src2, Src1, Src3),
2055520538
Mask, PassThru, Subtarget, DAG);
2055620539
}
20540+
case VPERMI_3OP_MASK:{
20541+
// Src2 is the PassThru
20542+
SDValue Src1 = Op.getOperand(1);
20543+
SDValue Src2 = Op.getOperand(2);
20544+
SDValue Src3 = Op.getOperand(3);
20545+
SDValue Mask = Op.getOperand(4);
20546+
MVT VT = Op.getSimpleValueType();
20547+
20548+
// set PassThru element
20549+
SDValue PassThru = DAG.getBitcast(VT, Src2);
20550+
20551+
return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
20552+
dl, Op.getValueType(),
20553+
Src1, Src2, Src3),
20554+
Mask, PassThru, Subtarget, DAG);
20555+
}
2055720556
case FMA_OP_MASK3:
2055820557
case FMA_OP_MASKZ:
2055920558
case FMA_OP_MASK: {
@@ -25873,7 +25872,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
2587325872
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
2587425873
case X86ISD::VPERMV: return "X86ISD::VPERMV";
2587525874
case X86ISD::VPERMV3: return "X86ISD::VPERMV3";
25876-
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
2587725875
case X86ISD::VPERMI: return "X86ISD::VPERMI";
2587825876
case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG";
2587925877
case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM";
@@ -38861,7 +38859,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
3886138859
case X86ISD::VPERMI:
3886238860
case X86ISD::VPERMV:
3886338861
case X86ISD::VPERMV3:
38864-
case X86ISD::VPERMIV3:
3886538862
case X86ISD::VPERMIL2:
3886638863
case X86ISD::VPERMILPI:
3886738864
case X86ISD::VPERMILPV:

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -428,10 +428,6 @@ namespace llvm {
428428
// Res = VPERMV3 V0, MaskV, V1
429429
VPERMV3,
430430

431-
// 3-op Variable Permute overwriting the index (VPERMI2).
432-
// Res = VPERMIV3 V0, MaskV, V1
433-
VPERMIV3,
434-
435431
// Bitwise ternary logic.
436432
VPTERNLOG,
437433
// Fix Up Special Packed Float32/64 values.

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
334334
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
335335
// operand differs from the output VT. This requires a bitconvert on
336336
// the preserved vector going into the vselect.
337+
// NOTE: The unmasked pattern is disabled.
337338
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
338339
X86VectorVTInfo InVT,
339340
dag Outs, dag NonTiedIns, string OpcodeStr,
@@ -343,7 +344,7 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
343344
!con((ins InVT.RC:$src1), NonTiedIns),
344345
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
345346
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
346-
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
347+
OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag),
347348
(vselect InVT.KRCWM:$mask, RHS,
348349
(bitconvert InVT.RC:$src1)),
349350
vselect, "", IsCommutable>;
@@ -1719,17 +1720,19 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
17191720
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
17201721
X86FoldableSchedWrite sched,
17211722
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722-
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1723+
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1724+
hasSideEffects = 0 in {
17231725
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
17241726
(ins _.RC:$src2, _.RC:$src3),
17251727
OpcodeStr, "$src3, $src2", "$src2, $src3",
1726-
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>,
1728+
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>,
17271729
EVEX_4V, AVX5128IBase, Sched<[sched]>;
17281730

1731+
let mayLoad = 1 in
17291732
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
17301733
(ins _.RC:$src2, _.MemOp:$src3),
17311734
OpcodeStr, "$src3, $src2", "$src2, $src3",
1732-
(_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
1735+
(_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1,
17331736
(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
17341737
EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
17351738
}
@@ -1738,13 +1741,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
17381741
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
17391742
X86FoldableSchedWrite sched,
17401743
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1741-
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1744+
let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1745+
hasSideEffects = 0, mayLoad = 1 in
17421746
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
17431747
(ins _.RC:$src2, _.ScalarMemOp:$src3),
17441748
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
17451749
!strconcat("$src2, ${src3}", _.BroadcastStr ),
1746-
(_.VT (X86VPermi2X IdxVT.RC:$src1,
1747-
_.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1750+
(_.VT (X86VPermt2 _.RC:$src2,
1751+
IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
17481752
AVX5128IBase, EVEX_4V, EVEX_B,
17491753
Sched<[sched.Folded, ReadAfterLd]>;
17501754
}
@@ -1806,21 +1810,22 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
18061810
X86VectorVTInfo IdxVT,
18071811
X86VectorVTInfo CastVT> {
18081812
def : Pat<(_.VT (vselect _.KRCWM:$mask,
1809-
(X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1810-
(_.VT _.RC:$src2), _.RC:$src3),
1813+
(X86VPermt2 (_.VT _.RC:$src2),
1814+
(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3),
18111815
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
18121816
(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
18131817
_.RC:$src2, _.RC:$src3)>;
18141818
def : Pat<(_.VT (vselect _.KRCWM:$mask,
1815-
(X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1816-
_.RC:$src2, (_.LdFrag addr:$src3)),
1819+
(X86VPermt2 _.RC:$src2,
1820+
(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1821+
(_.LdFrag addr:$src3)),
18171822
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
18181823
(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
18191824
_.RC:$src2, addr:$src3)>;
18201825
def : Pat<(_.VT (vselect _.KRCWM:$mask,
1821-
(X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1822-
_.RC:$src2,
1823-
(X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1826+
(X86VPermt2 _.RC:$src2,
1827+
(IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1828+
(X86VBroadcast (_.ScalarLdFrag addr:$src3))),
18241829
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
18251830
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
18261831
_.RC:$src2, addr:$src3)>;

llvm/lib/Target/X86/X86InstrFragmentsSIMD.td

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -417,13 +417,6 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3",
417417
SDTCisSameSizeAs<0,2>,
418418
SDTCisSameAs<0,3>]>, []>;
419419

420-
def X86VPermi2X : SDNode<"X86ISD::VPERMIV3",
421-
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
422-
SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
423-
SDTCisSameSizeAs<0,1>,
424-
SDTCisSameAs<0,2>,
425-
SDTCisSameAs<0,3>]>, []>;
426-
427420
def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>;
428421

429422
def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>;

llvm/lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 37 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ enum IntrinsicType : uint16_t {
3131
FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3,
3232
FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3,
3333
IFMA_OP,
34-
VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
34+
VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK,
3535
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK,
3636
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
3737
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
@@ -1061,42 +1061,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
10611061
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
10621062
X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
10631063

1064-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
1065-
X86ISD::VPERMIV3, 0),
1066-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
1067-
X86ISD::VPERMIV3, 0),
1068-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK,
1069-
X86ISD::VPERMIV3, 0),
1070-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK,
1071-
X86ISD::VPERMIV3, 0),
1072-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK,
1073-
X86ISD::VPERMIV3, 0),
1074-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK,
1075-
X86ISD::VPERMIV3, 0),
1076-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK,
1077-
X86ISD::VPERMIV3, 0),
1078-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK,
1079-
X86ISD::VPERMIV3, 0),
1080-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK,
1081-
X86ISD::VPERMIV3, 0),
1082-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK,
1083-
X86ISD::VPERMIV3, 0),
1084-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK,
1085-
X86ISD::VPERMIV3, 0),
1086-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK,
1087-
X86ISD::VPERMIV3, 0),
1088-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK,
1089-
X86ISD::VPERMIV3, 0),
1090-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK,
1091-
X86ISD::VPERMIV3, 0),
1092-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
1093-
X86ISD::VPERMIV3, 0),
1094-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERM_3OP_MASK,
1095-
X86ISD::VPERMIV3, 0),
1096-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERM_3OP_MASK,
1097-
X86ISD::VPERMIV3, 0),
1098-
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK,
1099-
X86ISD::VPERMIV3, 0),
1064+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK,
1065+
X86ISD::VPERMV3, 0),
1066+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK,
1067+
X86ISD::VPERMV3, 0),
1068+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK,
1069+
X86ISD::VPERMV3, 0),
1070+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK,
1071+
X86ISD::VPERMV3, 0),
1072+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK,
1073+
X86ISD::VPERMV3, 0),
1074+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK,
1075+
X86ISD::VPERMV3, 0),
1076+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK,
1077+
X86ISD::VPERMV3, 0),
1078+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK,
1079+
X86ISD::VPERMV3, 0),
1080+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK,
1081+
X86ISD::VPERMV3, 0),
1082+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK,
1083+
X86ISD::VPERMV3, 0),
1084+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK,
1085+
X86ISD::VPERMV3, 0),
1086+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK,
1087+
X86ISD::VPERMV3, 0),
1088+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK,
1089+
X86ISD::VPERMV3, 0),
1090+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK,
1091+
X86ISD::VPERMV3, 0),
1092+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK,
1093+
X86ISD::VPERMV3, 0),
1094+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK,
1095+
X86ISD::VPERMV3, 0),
1096+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK,
1097+
X86ISD::VPERMV3, 0),
1098+
X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK,
1099+
X86ISD::VPERMV3, 0),
11001100
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
11011101
X86ISD::VPERMV3, 0),
11021102
X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,

llvm/test/CodeGen/X86/avx512-intrinsics.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1963,8 +1963,8 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
19631963
; CHECK-NEXT: kmovw %esi, %k1
19641964
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
19651965
; CHECK-NEXT: vpermi2d (%rdi), %zmm0, %zmm3 {%k1}
1966-
; CHECK-NEXT: vpermi2d %zmm2, %zmm0, %zmm1
1967-
; CHECK-NEXT: vpaddd %zmm1, %zmm3, %zmm0
1966+
; CHECK-NEXT: vpermt2d %zmm2, %zmm1, %zmm0
1967+
; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
19681968
; CHECK-NEXT: retq
19691969
%x2 = load <16 x i32>, <16 x i32>* %x2p
19701970
%res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
@@ -1979,8 +1979,8 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
19791979
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
19801980
; CHECK: ## %bb.0:
19811981
; CHECK-NEXT: kmovw %edi, %k1
1982-
; CHECK-NEXT: vmovapd %zmm1, %zmm3
1983-
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm3
1982+
; CHECK-NEXT: vmovapd %zmm0, %zmm3
1983+
; CHECK-NEXT: vpermt2pd %zmm2, %zmm1, %zmm3
19841984
; CHECK-NEXT: vpermi2pd %zmm2, %zmm0, %zmm1 {%k1}
19851985
; CHECK-NEXT: vaddpd %zmm3, %zmm1, %zmm0
19861986
; CHECK-NEXT: retq
@@ -1996,8 +1996,8 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
19961996
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
19971997
; CHECK: ## %bb.0:
19981998
; CHECK-NEXT: kmovw %edi, %k1
1999-
; CHECK-NEXT: vmovaps %zmm1, %zmm3
2000-
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm3
1999+
; CHECK-NEXT: vmovaps %zmm0, %zmm3
2000+
; CHECK-NEXT: vpermt2ps %zmm2, %zmm1, %zmm3
20012001
; CHECK-NEXT: vpermi2ps %zmm2, %zmm0, %zmm1 {%k1}
20022002
; CHECK-NEXT: vaddps %zmm3, %zmm1, %zmm0
20032003
; CHECK-NEXT: retq
@@ -2013,8 +2013,8 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
20132013
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
20142014
; CHECK: ## %bb.0:
20152015
; CHECK-NEXT: kmovw %edi, %k1
2016-
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm3
2017-
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm3
2016+
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm3
2017+
; CHECK-NEXT: vpermt2q %zmm2, %zmm1, %zmm3
20182018
; CHECK-NEXT: vpermi2q %zmm2, %zmm0, %zmm1 {%k1}
20192019
; CHECK-NEXT: vpaddq %zmm3, %zmm1, %zmm0
20202020
; CHECK-NEXT: retq

llvm/test/CodeGen/X86/avx512bw-intrinsics.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,17 +1055,17 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
10551055
; AVX512BW-NEXT: kmovd %edi, %k1
10561056
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm3
10571057
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1058-
; AVX512BW-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1059-
; AVX512BW-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1058+
; AVX512BW-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
1059+
; AVX512BW-NEXT: vpaddw %zmm0, %zmm3, %zmm0
10601060
; AVX512BW-NEXT: retq
10611061
;
10621062
; AVX512F-32-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
10631063
; AVX512F-32: # %bb.0:
10641064
; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
10651065
; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm3
10661066
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm3 {%k1}
1067-
; AVX512F-32-NEXT: vpermi2w %zmm2, %zmm0, %zmm1
1068-
; AVX512F-32-NEXT: vpaddw %zmm1, %zmm3, %zmm0
1067+
; AVX512F-32-NEXT: vpermt2w %zmm2, %zmm1, %zmm0
1068+
; AVX512F-32-NEXT: vpaddw %zmm0, %zmm3, %zmm0
10691069
; AVX512F-32-NEXT: retl
10701070
%res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
10711071
%res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)

llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,8 +1872,8 @@ define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x
18721872
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
18731873
; CHECK: ## %bb.0:
18741874
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1875-
; CHECK-NEXT: vmovdqa %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
1876-
; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xda]
1875+
; CHECK-NEXT: vmovdqa %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
1876+
; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm3 ## encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda]
18771877
; CHECK-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
18781878
; CHECK-NEXT: vpaddw %xmm3, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
18791879
; CHECK-NEXT: retq ## encoding: [0xc3]
@@ -1889,8 +1889,8 @@ define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16
18891889
; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
18901890
; CHECK: ## %bb.0:
18911891
; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
1892-
; CHECK-NEXT: vmovdqa %ymm1, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
1893-
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm3 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xda]
1892+
; CHECK-NEXT: vmovdqa %ymm0, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
1893+
; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm3 ## encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda]
18941894
; CHECK-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
18951895
; CHECK-NEXT: vpaddw %ymm3, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
18961896
; CHECK-NEXT: retq ## encoding: [0xc3]

0 commit comments

Comments
 (0)