@@ -334,6 +334,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
334
334
// Similar to AVX512_maskable_3src but in this case the input VT for the tied
335
335
// operand differs from the output VT. This requires a bitconvert on
336
336
// the preserved vector going into the vselect.
337
+ // NOTE: The unmasked pattern is disabled.
337
338
multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
338
339
X86VectorVTInfo InVT,
339
340
dag Outs, dag NonTiedIns, string OpcodeStr,
@@ -343,7 +344,7 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
343
344
!con((ins InVT.RC:$src1), NonTiedIns),
344
345
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
345
346
!con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
346
- OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS ,
347
+ OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag) ,
347
348
(vselect InVT.KRCWM:$mask, RHS,
348
349
(bitconvert InVT.RC:$src1)),
349
350
vselect, "", IsCommutable>;
@@ -1719,17 +1720,19 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
1719
1720
multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
1720
1721
X86FoldableSchedWrite sched,
1721
1722
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1722
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1723
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1724
+ hasSideEffects = 0 in {
1723
1725
defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
1724
1726
(ins _.RC:$src2, _.RC:$src3),
1725
1727
OpcodeStr, "$src3, $src2", "$src2, $src3",
1726
- (_.VT (X86VPermi2X IdxVT .RC:$src1, _ .RC:$src2 , _.RC:$src3)), 1>,
1728
+ (_.VT (X86VPermt2 _ .RC:$src2, IdxVT .RC:$src1 , _.RC:$src3)), 1>,
1727
1729
EVEX_4V, AVX5128IBase, Sched<[sched]>;
1728
1730
1731
+ let mayLoad = 1 in
1729
1732
defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1730
1733
(ins _.RC:$src2, _.MemOp:$src3),
1731
1734
OpcodeStr, "$src3, $src2", "$src2, $src3",
1732
- (_.VT (X86VPermi2X IdxVT .RC:$src1, _ .RC:$src2 ,
1735
+ (_.VT (X86VPermt2 _ .RC:$src2, IdxVT .RC:$src1 ,
1733
1736
(_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
1734
1737
EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>;
1735
1738
}
@@ -1738,13 +1741,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
1738
1741
multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
1739
1742
X86FoldableSchedWrite sched,
1740
1743
X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
1741
- let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
1744
+ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain,
1745
+ hasSideEffects = 0, mayLoad = 1 in
1742
1746
defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
1743
1747
(ins _.RC:$src2, _.ScalarMemOp:$src3),
1744
1748
OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"),
1745
1749
!strconcat("$src2, ${src3}", _.BroadcastStr ),
1746
- (_.VT (X86VPermi2X IdxVT .RC:$src1 ,
1747
- _ .RC:$src2 ,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1750
+ (_.VT (X86VPermt2 _ .RC:$src2 ,
1751
+ IdxVT .RC:$src1 ,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>,
1748
1752
AVX5128IBase, EVEX_4V, EVEX_B,
1749
1753
Sched<[sched.Folded, ReadAfterLd]>;
1750
1754
}
@@ -1806,21 +1810,22 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _,
1806
1810
X86VectorVTInfo IdxVT,
1807
1811
X86VectorVTInfo CastVT> {
1808
1812
def : Pat<(_.VT (vselect _.KRCWM:$mask,
1809
- (X86VPermi2X (IdxVT .VT (bitconvert (CastVT.VT _.RC:$src1)) ),
1810
- (_ .VT _.RC:$src2 ), _.RC:$src3),
1813
+ (X86VPermt2 (_ .VT _.RC:$src2 ),
1814
+ (IdxVT.VT (bitconvert (CastVT .VT _.RC:$src1)) ), _.RC:$src3),
1811
1815
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1812
1816
(!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask,
1813
1817
_.RC:$src2, _.RC:$src3)>;
1814
1818
def : Pat<(_.VT (vselect _.KRCWM:$mask,
1815
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1816
- _.RC:$src2, (_.LdFrag addr:$src3)),
1819
+ (X86VPermt2 _.RC:$src2,
1820
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))),
1821
+ (_.LdFrag addr:$src3)),
1817
1822
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1818
1823
(!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask,
1819
1824
_.RC:$src2, addr:$src3)>;
1820
1825
def : Pat<(_.VT (vselect _.KRCWM:$mask,
1821
- (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))) ,
1822
- _.RC:$src2 ,
1823
- (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1826
+ (X86VPermt2 _.RC:$src2 ,
1827
+ (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))) ,
1828
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
1824
1829
(_.VT (bitconvert (CastVT.VT _.RC:$src1))))),
1825
1830
(!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask,
1826
1831
_.RC:$src2, addr:$src3)>;
0 commit comments