-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[X86] Support lowering for APX promoted BMI instructions. #77433
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: None (XinWang10) ChangesR16-R31 was added into GPRs in #70958, RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4 Patch is 97.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77433.diff 12 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 73b10cf3067e1a..8c8dfee23d4b9d 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -4087,14 +4087,17 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
SDValue Control;
unsigned ROpc, MOpc;
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
if (!PreferBEXTR) {
assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then.");
// If we can't make use of BEXTR then we can't fuse shift+mask stages.
// Let's perform the mask first, and apply shift later. Note that we need to
// widen the mask to account for the fact that we'll apply shift afterwards!
Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT);
- ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr;
- MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm;
+ ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rr)
+ : GET_EGPR_IF_ENABLED(X86::BZHI32rr);
+ MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BZHI64rm)
+ : GET_EGPR_IF_ENABLED(X86::BZHI32rm);
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
} else {
@@ -4109,12 +4112,15 @@ MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) {
} else {
assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then.");
// BMI requires the immediate to placed in a register.
- ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr;
- MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm;
+ ROpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rr)
+ : GET_EGPR_IF_ENABLED(X86::BEXTR32rr);
+ MOpc = NVT == MVT::i64 ? GET_EGPR_IF_ENABLED(X86::BEXTR64rm)
+ : GET_EGPR_IF_ENABLED(X86::BEXTR32rm);
unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri;
Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0);
}
}
+#undef GET_EGPR_IF_ENABLED
MachineSDNode *NewNode;
SDValue Input = N0->getOperand(0);
@@ -5482,26 +5488,32 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty();
switch (NVT.SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
+#define GET_EGPR_IF_ENABLED(OPC) Subtarget->hasEGPR() ? OPC##_EVEX : OPC
case MVT::i32:
- Opc = UseMULXHi ? X86::MULX32Hrr :
- UseMULX ? X86::MULX32rr :
- IsSigned ? X86::IMUL32r : X86::MUL32r;
- MOpc = UseMULXHi ? X86::MULX32Hrm :
- UseMULX ? X86::MULX32rm :
- IsSigned ? X86::IMUL32m : X86::MUL32m;
+ Opc = UseMULXHi ? X86::MULX32Hrr
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rr)
+ : IsSigned ? X86::IMUL32r
+ : X86::MUL32r;
+ MOpc = UseMULXHi ? X86::MULX32Hrm
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX32rm)
+ : IsSigned ? X86::IMUL32m
+ : X86::MUL32m;
LoReg = UseMULX ? X86::EDX : X86::EAX;
HiReg = X86::EDX;
break;
case MVT::i64:
- Opc = UseMULXHi ? X86::MULX64Hrr :
- UseMULX ? X86::MULX64rr :
- IsSigned ? X86::IMUL64r : X86::MUL64r;
- MOpc = UseMULXHi ? X86::MULX64Hrm :
- UseMULX ? X86::MULX64rm :
- IsSigned ? X86::IMUL64m : X86::MUL64m;
+ Opc = UseMULXHi ? X86::MULX64Hrr
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rr)
+ : IsSigned ? X86::IMUL64r
+ : X86::MUL64r;
+ MOpc = UseMULXHi ? X86::MULX64Hrm
+ : UseMULX ? GET_EGPR_IF_ENABLED(X86::MULX64rm)
+ : IsSigned ? X86::IMUL64m
+ : X86::MUL64m;
LoReg = UseMULX ? X86::RDX : X86::RAX;
HiReg = X86::RDX;
break;
+#undef GET_EGPR_IF_ENABLED
}
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 5cfa95e085e34a..9c9c387154f267 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1296,7 +1296,7 @@ defm ANDN32 : AndN<Xi32, "_EVEX">, EVEX, Requires<[HasBMI, HasEGPR, In64BitMode]
defm ANDN64 : AndN<Xi64, "_EVEX">, EVEX, REX_W, Requires<[HasBMI, HasEGPR, In64BitMode]>;
}
-let Predicates = [HasBMI], AddedComplexity = -6 in {
+let Predicates = [HasBMI, NoEGPR], AddedComplexity = -6 in {
def : Pat<(and (not GR32:$src1), GR32:$src2),
(ANDN32rr GR32:$src1, GR32:$src2)>;
def : Pat<(and (not GR64:$src1), GR64:$src2),
@@ -1307,6 +1307,17 @@ let Predicates = [HasBMI], AddedComplexity = -6 in {
(ANDN64rm GR64:$src1, addr:$src2)>;
}
+let Predicates = [HasBMI, HasEGPR], AddedComplexity = -6 in {
+ def : Pat<(and (not GR32:$src1), GR32:$src2),
+ (ANDN32rr_EVEX GR32:$src1, GR32:$src2)>;
+ def : Pat<(and (not GR64:$src1), GR64:$src2),
+ (ANDN64rr_EVEX GR64:$src1, GR64:$src2)>;
+ def : Pat<(and (not GR32:$src1), (loadi32 addr:$src2)),
+ (ANDN32rm_EVEX GR32:$src1, addr:$src2)>;
+ def : Pat<(and (not GR64:$src1), (loadi64 addr:$src2)),
+ (ANDN64rm_EVEX GR64:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// MULX Instruction
//
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index c77c77ee4a3eeb..671094128e9b8c 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1845,7 +1845,7 @@ def : Pat<(fshl GR64:$src1, GR64:$src2, (shiftMask64 CL)),
def : Pat<(fshr GR64:$src2, GR64:$src1, (shiftMask64 CL)),
(SHRD64rrCL GR64:$src1, GR64:$src2)>;
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
let AddedComplexity = 1 in {
def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
(SARX32rr GR32:$src1,
@@ -1903,6 +1903,64 @@ let Predicates = [HasBMI2] in {
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+let Predicates = [HasBMI2, HasEGPR] in {
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SARX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SARX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SHRX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SHRX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, (shiftMask32 GR8:$src2)),
+ (SHLX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, (shiftMask64 GR8:$src2)),
+ (SHLX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ def : Pat<(sra (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SARX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SARX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SHRX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SHRX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), (shiftMask32 GR8:$src2)),
+ (SHLX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), (shiftMask64 GR8:$src2)),
+ (SHLX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
// Use BTR/BTS/BTC for clearing/setting/toggling a bit in a variable location.
multiclass one_bit_patterns<RegisterClass RC, ValueType VT, Instruction BTR,
Instruction BTS, Instruction BTC,
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 97c625a64cfc0b..0fb332fc5d6cf2 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1241,7 +1241,7 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in {
defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX;
}
-let Predicates = [HasBMI] in {
+let Predicates = [HasBMI, NoEGPR] in {
// FIXME(1): patterns for the load versions are not implemented
// FIXME(2): By only matching `add_su` and `ineg_su` we may emit
// extra `mov` instructions if `src` has future uses. It may be better
@@ -1278,6 +1278,39 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
+let Predicates = [HasBMI, HasEGPR] in {
+ def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr_EVEX GR32:$src)>;
+ def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr_EVEX GR64:$src)>;
+
+ def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr_EVEX GR32:$src)>;
+ def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr_EVEX GR64:$src)>;
+
+ def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr_EVEX GR32:$src)>;
+ def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr_EVEX GR64:$src)>;
+
+ // Versions to match flag producing ops.
+ def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr_EVEX GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr_EVEX GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr_EVEX GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr_EVEX GR64:$src)>;
+
+ def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr_EVEX GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr_EVEX GR64:$src)>;
+}
+
multiclass Bmi4VOp3<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node,
X86FoldableSchedWrite sched, string Suffix = ""> {
let SchedRW = [sched], Form = MRMSrcReg4VOp3 in
@@ -1324,7 +1357,7 @@ def AndMask64 : ImmLeaf<i64, [{
}]>;
// Use BEXTR for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI, NoBMI2, NoTBM] in {
+let Predicates = [HasBMI, NoBMI2, NoTBM, NoEGPR] in {
def : Pat<(and GR64:$src, AndMask64:$mask),
(BEXTR64rr GR64:$src,
(SUBREG_TO_REG (i64 0),
@@ -1335,8 +1368,19 @@ let Predicates = [HasBMI, NoBMI2, NoTBM] in {
(MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
}
+let Predicates = [HasBMI, NoBMI2, NoTBM, HasEGPR] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BEXTR64rr_EVEX GR64:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BEXTR64rm_EVEX addr:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+}
+
// Use BZHI for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI2, NoTBM] in {
+let Predicates = [HasBMI2, NoTBM, NoEGPR] in {
def : Pat<(and GR64:$src, AndMask64:$mask),
(BZHI64rr GR64:$src,
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -1347,6 +1391,17 @@ let Predicates = [HasBMI2, NoTBM] in {
(MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
}
+let Predicates = [HasBMI2, NoTBM, HasEGPR] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BZHI64rr_EVEX GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BZHI64rm_EVEX addr:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+}
+
multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
X86MemOperand x86memop, SDPatternOperator OpNode,
PatFrag ld_frag, string Suffix = ""> {
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index f951894db1890c..e225fe6950e3da 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -879,7 +879,7 @@ let Predicates = [HasBMI2, HasEGPR, In64BitMode] in {
defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem, "_EVEX">, T8, PD, REX_W, EVEX;
}
-let Predicates = [HasBMI2] in {
+let Predicates = [HasBMI2, NoEGPR] in {
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
@@ -983,6 +983,86 @@ let Predicates = [HasBMI2] in {
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
+let Predicates = [HasBMI2, HasEGPR] in {
+ let AddedComplexity = 10 in {
+ def : Pat<(rotr GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri_EVEX GR32:$src, imm:$shamt)>;
+ def : Pat<(rotr GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri_EVEX GR64:$src, imm:$shamt)>;
+
+ def : Pat<(rotl GR32:$src, (i8 imm:$shamt)),
+ (RORX32ri_EVEX GR32:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl GR64:$src, (i8 imm:$shamt)),
+ (RORX64ri_EVEX GR64:$src, (ROT64L2R_imm8 imm:$shamt))>;
+ }
+
+ def : Pat<(rotr (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi_EVEX addr:$src, imm:$shamt)>;
+ def : Pat<(rotr (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi_EVEX addr:$src, imm:$shamt)>;
+
+ def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)),
+ (RORX32mi_EVEX addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
+ def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
+ (RORX64mi_EVEX addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
+
+ let AddedComplexity = 1 in {
+ def : Pat<(sra GR32:$src1, GR8:$src2),
+ (SARX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra GR64:$src1, GR8:$src2),
+ (SARX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl GR32:$src1, GR8:$src2),
+ (SHRX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl GR64:$src1, GR8:$src2),
+ (SHRX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl GR32:$src1, GR8:$src2),
+ (SHLX32rr_EVEX GR32:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl GR64:$src1, GR8:$src2),
+ (SHLX64rr_EVEX GR64:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ }
+
+ def : Pat<(sra (loadi32 addr:$src1), GR8:$src2),
+ (SARX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(sra (loadi64 addr:$src1), GR8:$src2),
+ (SARX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(srl (loadi32 addr:$src1), GR8:$src2),
+ (SHRX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(srl (loadi64 addr:$src1), GR8:$src2),
+ (SHRX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+ def : Pat<(shl (loadi32 addr:$src1), GR8:$src2),
+ (SHLX32rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+ def : Pat<(shl (loadi64 addr:$src1), GR8:$src2),
+ (SHLX64rm_EVEX addr:$src1,
+ (INSERT_SUBREG
+ (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+}
+
def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
(ROL8ri GR8:$src1, relocImm:$src2)>;
def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
diff --git a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
index d704f38307fcb8..5b7bb1ca97b5ca 100644
--- a/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/bmi-intrinsics-fast-isel-x86_64.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+bmi,+egpr --show-mc-encoding | FileCheck %s --check-prefix=EGPR
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/bmi-builtins.c
@@ -14,6 +15,13 @@ define i64 @test__andn_u64(i64 %a0, i64 %a1) {
; X64-NEXT: xorq $-1, %rax
; X64-NEXT: andq %rsi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__andn_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; EGPR-NEXT: xorq $-1, %rax # encoding: [0x48,0x83,0xf0,0xff]
+; EGPR-NEXT: andq %rsi, %rax # encoding: [0x48,0x21,0xf0]
+; EGPR-NEXT: retq # encoding: [0xc3]
%xor = xor i64 %a0, -1
%res = and i64 %xor, %a1
ret i64 %res
@@ -24,6 +32,11 @@ define i64 @test__bextr_u64(i64 %a0, i64 %a1) {
; X64: # %bb.0:
; X64-NEXT: bextrq %rsi, %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__bextr_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: bextrq %rsi, %rdi, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xc8,0xf7,0xc7]
+; EGPR-NEXT: retq # encoding: [0xc3]
%res = call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
ret i64 %res
}
@@ -35,6 +48,13 @@ define i64 @test__blsi_u64(i64 %a0) {
; X64-NEXT: subq %rdi, %rax
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsi_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0]
+; EGPR-NEXT: subq %rdi, %rax # encoding: [0x48,0x29,0xf8]
+; EGPR-NEXT: andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
+; EGPR-NEXT: retq # encoding: [0xc3]
%neg = sub i64 0, %a0
%res = and i64 %a0, %neg
ret i64 %res
@@ -46,6 +66,12 @@ define i64 @test__blsmsk_u64(i64 %a0) {
; X64-NEXT: leaq -1(%rdi), %rax
; X64-NEXT: xorq %rdi, %rax
; X64-NEXT: retq
+;
+; EGPR-LABEL: test__blsmsk_u64:
+; EGPR: # %bb.0:
+; EGPR-NEXT: leaq -1(%rdi), %rax # encoding: [0x48,0x8d,0x47,0xff]
+; EGPR-NEXT: xorq %rdi, %rax # encoding: [0x48,0x31,0xf8]
+;...
[truncated]
|
1ab1928
to
e2feb9a
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) use lowercase for tblgen defs
@@ -1241,43 +1241,49 @@ let Predicates = [HasBMI, In64BitMode], Defs = [EFLAGS] in { | |||
defm BLSI64 : Bls<"blsi", MRM3r, MRM3m, Xi64, "_EVEX">, EVEX; | |||
} | |||
|
|||
let Predicates = [HasBMI] in { | |||
multiclass Bls_Pats<string suffix> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bls_pats
…X86InstrShiftRotate.td This patch is to extract NFC in #77433 into a separate commit.
…X86InstrCompiler.td This patch is to extract NFC in #77433 into a separate commit.
Need rebase |
554a143
to
ad0965f
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
@XinWang10 This is causing failures on EXPENSIVE_CHECKS builds - please can you take a look? |
Error message ``` *** Bad machine code: Illegal virtual register for instruction *** - function: test__blsi_u32 - basic block: %bb.0 (0x7a61208) - instruction: %5:gr32 = MOV32r0 implicit-def $eflags - operand 0: %5:gr32 Expected a GR32_NOREX2 register, but got a GR32 register ``` Reported by RKSimon in #77433 The failure is b/c compiler emits a MOV32r0 with operand GR32 when fast-isel is enabled. ``` // X86FastISel.cpp Register SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass) ``` However, before this patch, compiler only allows GR32_NOREX operand b/c MOV32r0 is a pseudo instruction. In this patch, we relax the register class of the operand to GR32 b/c MOV32r0 is always expanded to XOR32rr, which can use EGPR. The bug was not introduced by #77433 but caught by it.
|
R16-R31 was added into GPRs in llvm#70958, This patch supports the lowering for promoted BMI instructions in EVEX space, enc/dec has been supported in llvm#73899. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
…X86InstrShiftRotate.td This patch is to extract NFC in llvm#77433 into a separate commit.
…X86InstrCompiler.td This patch is to extract NFC in llvm#77433 into a separate commit.
R16-R31 was added into GPRs in #70958,
This patch supports the lowering for promoted BMI instructions in EVEX space, enc/dec has been supported in #73899.
RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4