Skip to content

Commit 511ba45

Browse files
authored
[X86][MC][CodeGen] Support EGPR for KMOV (#73781)
KMOV is essential for copy between k-registers and GPRs. R16-R31 was added into GPRs in #70958, so we extend KMOV for these new registers first. This patch 1. Promotes KMOV instructions from VEX space to EVEX space 2. Emits prefix {evex} for the EVEX variants 3. Prefers EVEX variant than VEX variant in ISEL and optimizations for better RA EVEX variants will be compressed to VEX variants by existing EVEX2VEX pass if no EGPR is used. RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4 TAG: llvm-test-suite && CPU2017 can be built with feature egpr successfully.
1 parent 5891a8f commit 511ba45

14 files changed

+593
-53
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86InstPrinterCommon.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,8 @@ void X86InstPrinterCommon::printInstFlags(const MCInst *MI, raw_ostream &O,
377377
O << "\t{vex2}";
378378
else if (Flags & X86::IP_USE_VEX3)
379379
O << "\t{vex3}";
380-
else if (Flags & X86::IP_USE_EVEX)
380+
else if ((Flags & X86::IP_USE_EVEX) ||
381+
(TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitEVEXPrefix)
381382
O << "\t{evex}";
382383

383384
if (Flags & X86::IP_USE_DISP8)

llvm/lib/Target/X86/X86DomainReassignment.cpp

+19-13
Original file line numberDiff line numberDiff line change
@@ -619,16 +619,22 @@ void X86DomainReassignment::initConverters() {
619619
std::make_unique<InstrReplacerDstCOPY>(From, To);
620620
};
621621

622-
createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
623-
createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
622+
bool HasEGPR = STI->hasEGPR();
623+
createReplacerDstCOPY(X86::MOVZX32rm16,
624+
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
625+
createReplacerDstCOPY(X86::MOVZX64rm16,
626+
HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
624627

625628
createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
626629
createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
627630

628631
if (STI->hasDQI()) {
629-
createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
630-
createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
631-
createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
632+
createReplacerDstCOPY(X86::MOVZX16rm8,
633+
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
634+
createReplacerDstCOPY(X86::MOVZX32rm8,
635+
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
636+
createReplacerDstCOPY(X86::MOVZX64rm8,
637+
HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
632638

633639
createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
634640
createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
@@ -639,8 +645,8 @@ void X86DomainReassignment::initConverters() {
639645
Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
640646
};
641647

642-
createReplacer(X86::MOV16rm, X86::KMOVWkm);
643-
createReplacer(X86::MOV16mr, X86::KMOVWmk);
648+
createReplacer(X86::MOV16rm, HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm);
649+
createReplacer(X86::MOV16mr, HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
644650
createReplacer(X86::MOV16rr, X86::KMOVWkk);
645651
createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
646652
createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
@@ -650,11 +656,11 @@ void X86DomainReassignment::initConverters() {
650656
createReplacer(X86::XOR16rr, X86::KXORWrr);
651657

652658
if (STI->hasBWI()) {
653-
createReplacer(X86::MOV32rm, X86::KMOVDkm);
654-
createReplacer(X86::MOV64rm, X86::KMOVQkm);
659+
createReplacer(X86::MOV32rm, HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm);
660+
createReplacer(X86::MOV64rm, HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm);
655661

656-
createReplacer(X86::MOV32mr, X86::KMOVDmk);
657-
createReplacer(X86::MOV64mr, X86::KMOVQmk);
662+
createReplacer(X86::MOV32mr, HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
663+
createReplacer(X86::MOV64mr, HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
658664

659665
createReplacer(X86::MOV32rr, X86::KMOVDkk);
660666
createReplacer(X86::MOV64rr, X86::KMOVQkk);
@@ -695,8 +701,8 @@ void X86DomainReassignment::initConverters() {
695701

696702
createReplacer(X86::AND8rr, X86::KANDBrr);
697703

698-
createReplacer(X86::MOV8rm, X86::KMOVBkm);
699-
createReplacer(X86::MOV8mr, X86::KMOVBmk);
704+
createReplacer(X86::MOV8rm, HasEGPR ? X86::KMOVBkm_EVEX : X86::KMOVBkm);
705+
createReplacer(X86::MOV8mr, HasEGPR ? X86::KMOVBmk_EVEX : X86::KMOVBmk);
700706
createReplacer(X86::MOV8rr, X86::KMOVBkk);
701707

702708
createReplacer(X86::NOT8r, X86::KNOTBrr);

llvm/lib/Target/X86/X86ExpandPseudo.cpp

+13-6
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,7 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
264264
MachineInstr &MI = *MBBI;
265265
unsigned Opcode = MI.getOpcode();
266266
const DebugLoc &DL = MBBI->getDebugLoc();
267+
bool HasEGPR = STI->hasEGPR();
267268
switch (Opcode) {
268269
default:
269270
return false;
@@ -466,10 +467,14 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
466467
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
467468
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
468469

469-
auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
470-
.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
471-
auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWkm))
472-
.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
470+
auto MIBLo =
471+
BuildMI(MBB, MBBI, DL,
472+
TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
473+
.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
474+
auto MIBHi =
475+
BuildMI(MBB, MBBI, DL,
476+
TII->get(HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm))
477+
.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
473478

474479
for (int i = 0; i < X86::AddrNumOperands; ++i) {
475480
MIBLo.add(MBBI->getOperand(1 + i));
@@ -500,8 +505,10 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
500505
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
501506
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
502507

503-
auto MIBLo = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
504-
auto MIBHi = BuildMI(MBB, MBBI, DL, TII->get(X86::KMOVWmk));
508+
auto MIBLo = BuildMI(MBB, MBBI, DL,
509+
TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
510+
auto MIBHi = BuildMI(MBB, MBBI, DL,
511+
TII->get(HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk));
505512

506513
for (int i = 0; i < X86::AddrNumOperands; ++i) {
507514
MIBLo.add(MBBI->getOperand(i));

llvm/lib/Target/X86/X86InstrAVX512.td

+45-25
Original file line numberDiff line numberDiff line change
@@ -2853,46 +2853,56 @@ defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, E
28532853
// - copy from GPR to mask register and vice versa
28542854
//
28552855
multiclass avx512_mask_mov<bits<8> opc_kk, bits<8> opc_km, bits<8> opc_mk,
2856-
string OpcodeStr, RegisterClass KRC,
2857-
ValueType vvt, X86MemOperand x86memop> {
2856+
string OpcodeStr, RegisterClass KRC, ValueType vvt,
2857+
X86MemOperand x86memop, string Suffix = ""> {
2858+
let explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
28582859
let isMoveReg = 1, hasSideEffects = 0, SchedRW = [WriteMove] in
2859-
def kk : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2860-
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2861-
Sched<[WriteMove]>;
2862-
def km : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2863-
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2864-
[(set KRC:$dst, (vvt (load addr:$src)))]>,
2865-
Sched<[WriteLoad]>;
2866-
def mk : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2867-
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2868-
[(store KRC:$src, addr:$dst)]>,
2869-
Sched<[WriteStore]>;
2860+
def kk#Suffix : I<opc_kk, MRMSrcReg, (outs KRC:$dst), (ins KRC:$src),
2861+
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2862+
Sched<[WriteMove]>;
2863+
def km#Suffix : I<opc_km, MRMSrcMem, (outs KRC:$dst), (ins x86memop:$src),
2864+
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2865+
[(set KRC:$dst, (vvt (load addr:$src)))]>,
2866+
Sched<[WriteLoad]>;
2867+
def mk#Suffix : I<opc_mk, MRMDestMem, (outs), (ins x86memop:$dst, KRC:$src),
2868+
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
2869+
[(store KRC:$src, addr:$dst)]>,
2870+
Sched<[WriteStore]>;
2871+
}
28702872
}
28712873

28722874
multiclass avx512_mask_mov_gpr<bits<8> opc_kr, bits<8> opc_rk,
2873-
string OpcodeStr,
2874-
RegisterClass KRC, RegisterClass GRC> {
2875-
let hasSideEffects = 0 in {
2876-
def kr : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2877-
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2878-
Sched<[WriteMove]>;
2879-
def rk : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2880-
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2881-
Sched<[WriteMove]>;
2875+
string OpcodeStr, RegisterClass KRC,
2876+
RegisterClass GRC, string Suffix = ""> {
2877+
let hasSideEffects = 0, explicitOpPrefix = !if(!eq(Suffix, ""), NoExplicitOpPrefix, ExplicitEVEX) in {
2878+
def kr#Suffix : I<opc_kr, MRMSrcReg, (outs KRC:$dst), (ins GRC:$src),
2879+
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2880+
Sched<[WriteMove]>;
2881+
def rk#Suffix : I<opc_rk, MRMSrcReg, (outs GRC:$dst), (ins KRC:$src),
2882+
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>,
2883+
Sched<[WriteMove]>;
28822884
}
28832885
}
28842886

2885-
let Predicates = [HasDQI] in
2887+
let Predicates = [HasDQI, NoEGPR] in
28862888
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem>,
28872889
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32>,
28882890
VEX, PD;
2891+
let Predicates = [HasDQI, HasEGPR, In64BitMode] in
2892+
defm KMOVB : avx512_mask_mov<0x90, 0x90, 0x91, "kmovb", VK8, v8i1, i8mem, "_EVEX">,
2893+
avx512_mask_mov_gpr<0x92, 0x93, "kmovb", VK8, GR32, "_EVEX">,
2894+
EVEX, PD;
28892895

2890-
let Predicates = [HasAVX512] in
2896+
let Predicates = [HasAVX512, NoEGPR] in
28912897
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem>,
28922898
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32>,
28932899
VEX, PS;
2900+
let Predicates = [HasAVX512, HasEGPR, In64BitMode] in
2901+
defm KMOVW : avx512_mask_mov<0x90, 0x90, 0x91, "kmovw", VK16, v16i1, i16mem, "_EVEX">,
2902+
avx512_mask_mov_gpr<0x92, 0x93, "kmovw", VK16, GR32, "_EVEX">,
2903+
EVEX, PS;
28942904

2895-
let Predicates = [HasBWI] in {
2905+
let Predicates = [HasBWI, NoEGPR] in {
28962906
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
28972907
VEX, PD, REX_W;
28982908
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
@@ -2902,6 +2912,16 @@ let Predicates = [HasBWI] in {
29022912
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
29032913
VEX, XD, REX_W;
29042914
}
2915+
let Predicates = [HasBWI, HasEGPR, In64BitMode] in {
2916+
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem, "_EVEX">,
2917+
EVEX, PD, REX_W;
2918+
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32, "_EVEX">,
2919+
EVEX, XD;
2920+
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem, "_EVEX">,
2921+
EVEX, PS, REX_W;
2922+
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64, "_EVEX">,
2923+
EVEX, XD, REX_W;
2924+
}
29052925

29062926
// GR from/to mask register
29072927
def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),

llvm/lib/Target/X86/X86InstrInfo.cpp

+26-7
Original file line numberDiff line numberDiff line change
@@ -495,10 +495,12 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
495495
return false;
496496
case X86::MOV8rm:
497497
case X86::KMOVBkm:
498+
case X86::KMOVBkm_EVEX:
498499
MemBytes = 1;
499500
return true;
500501
case X86::MOV16rm:
501502
case X86::KMOVWkm:
503+
case X86::KMOVWkm_EVEX:
502504
case X86::VMOVSHZrm:
503505
case X86::VMOVSHZrm_alt:
504506
MemBytes = 2;
@@ -511,6 +513,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
511513
case X86::VMOVSSZrm:
512514
case X86::VMOVSSZrm_alt:
513515
case X86::KMOVDkm:
516+
case X86::KMOVDkm_EVEX:
514517
MemBytes = 4;
515518
return true;
516519
case X86::MOV64rm:
@@ -524,6 +527,7 @@ static bool isFrameLoadOpcode(int Opcode, unsigned &MemBytes) {
524527
case X86::MMX_MOVD64rm:
525528
case X86::MMX_MOVQ64rm:
526529
case X86::KMOVQkm:
530+
case X86::KMOVQkm_EVEX:
527531
MemBytes = 8;
528532
return true;
529533
case X86::MOVAPSrm:
@@ -593,10 +597,12 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
593597
return false;
594598
case X86::MOV8mr:
595599
case X86::KMOVBmk:
600+
case X86::KMOVBmk_EVEX:
596601
MemBytes = 1;
597602
return true;
598603
case X86::MOV16mr:
599604
case X86::KMOVWmk:
605+
case X86::KMOVWmk_EVEX:
600606
case X86::VMOVSHZmr:
601607
MemBytes = 2;
602608
return true;
@@ -605,6 +611,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
605611
case X86::VMOVSSmr:
606612
case X86::VMOVSSZmr:
607613
case X86::KMOVDmk:
614+
case X86::KMOVDmk_EVEX:
608615
MemBytes = 4;
609616
return true;
610617
case X86::MOV64mr:
@@ -616,6 +623,7 @@ static bool isFrameStoreOpcode(int Opcode, unsigned &MemBytes) {
616623
case X86::MMX_MOVQ64mr:
617624
case X86::MMX_MOVNTQmr:
618625
case X86::KMOVQmk:
626+
case X86::KMOVQmk_EVEX:
619627
MemBytes = 8;
620628
return true;
621629
case X86::MOVAPSmr:
@@ -3519,6 +3527,7 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
35193527
const X86Subtarget &Subtarget) {
35203528
bool HasAVX = Subtarget.hasAVX();
35213529
bool HasAVX512 = Subtarget.hasAVX512();
3530+
bool HasEGPR = Subtarget.hasEGPR();
35223531

35233532
// SrcReg(MaskReg) -> DestReg(GR64)
35243533
// SrcReg(MaskReg) -> DestReg(GR32)
@@ -3527,10 +3536,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
35273536
if (X86::VK16RegClass.contains(SrcReg)) {
35283537
if (X86::GR64RegClass.contains(DestReg)) {
35293538
assert(Subtarget.hasBWI());
3530-
return X86::KMOVQrk;
3539+
return HasEGPR ? X86::KMOVQrk_EVEX : X86::KMOVQrk;
35313540
}
35323541
if (X86::GR32RegClass.contains(DestReg))
3533-
return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
3542+
return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDrk_EVEX : X86::KMOVDrk)
3543+
: (HasEGPR ? X86::KMOVWrk_EVEX : X86::KMOVWrk);
35343544
}
35353545

35363546
// SrcReg(GR64) -> DestReg(MaskReg)
@@ -3540,10 +3550,11 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
35403550
if (X86::VK16RegClass.contains(DestReg)) {
35413551
if (X86::GR64RegClass.contains(SrcReg)) {
35423552
assert(Subtarget.hasBWI());
3543-
return X86::KMOVQkr;
3553+
return HasEGPR ? X86::KMOVQkr_EVEX : X86::KMOVQkr;
35443554
}
35453555
if (X86::GR32RegClass.contains(SrcReg))
3546-
return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
3556+
return Subtarget.hasBWI() ? (HasEGPR ? X86::KMOVDkr_EVEX : X86::KMOVDkr)
3557+
: (HasEGPR ? X86::KMOVWkr_EVEX : X86::KMOVWkr);
35473558
}
35483559

35493560

@@ -3710,6 +3721,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
37103721
bool HasAVX = STI.hasAVX();
37113722
bool HasAVX512 = STI.hasAVX512();
37123723
bool HasVLX = STI.hasVLX();
3724+
bool HasEGPR = STI.hasEGPR();
37133725

37143726
assert(RC != nullptr && "Invalid target register class");
37153727
switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
@@ -3725,7 +3737,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
37253737
return Load ? X86::MOV8rm : X86::MOV8mr;
37263738
case 2:
37273739
if (X86::VK16RegClass.hasSubClassEq(RC))
3728-
return Load ? X86::KMOVWkm : X86::KMOVWmk;
3740+
return Load ? (HasEGPR ? X86::KMOVWkm_EVEX : X86::KMOVWkm)
3741+
: (HasEGPR ? X86::KMOVWmk_EVEX : X86::KMOVWmk);
37293742
assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
37303743
return Load ? X86::MOV16rm : X86::MOV16mr;
37313744
case 4:
@@ -3743,7 +3756,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
37433756
return Load ? X86::LD_Fp32m : X86::ST_Fp32m;
37443757
if (X86::VK32RegClass.hasSubClassEq(RC)) {
37453758
assert(STI.hasBWI() && "KMOVD requires BWI");
3746-
return Load ? X86::KMOVDkm : X86::KMOVDmk;
3759+
return Load ? (HasEGPR ? X86::KMOVDkm_EVEX : X86::KMOVDkm)
3760+
: (HasEGPR ? X86::KMOVDmk_EVEX : X86::KMOVDmk);
37473761
}
37483762
// All of these mask pair classes have the same spill size, the same kind
37493763
// of kmov instructions can be used with all of them.
@@ -3774,7 +3788,8 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
37743788
return Load ? X86::LD_Fp64m : X86::ST_Fp64m;
37753789
if (X86::VK64RegClass.hasSubClassEq(RC)) {
37763790
assert(STI.hasBWI() && "KMOVQ requires BWI");
3777-
return Load ? X86::KMOVQkm : X86::KMOVQmk;
3791+
return Load ? (HasEGPR ? X86::KMOVQkm_EVEX : X86::KMOVQkm)
3792+
: (HasEGPR ? X86::KMOVQmk_EVEX : X86::KMOVQmk);
37783793
}
37793794
llvm_unreachable("Unknown 8-byte regclass");
37803795
case 10:
@@ -7717,9 +7732,13 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
77177732
case X86::VMOVDQA64Zrm:
77187733
case X86::VMOVDQU64Zrm:
77197734
case X86::KMOVBkm:
7735+
case X86::KMOVBkm_EVEX:
77207736
case X86::KMOVWkm:
7737+
case X86::KMOVWkm_EVEX:
77217738
case X86::KMOVDkm:
7739+
case X86::KMOVDkm_EVEX:
77227740
case X86::KMOVQkm:
7741+
case X86::KMOVQkm_EVEX:
77237742
return true;
77247743
}
77257744
};

llvm/lib/Target/X86/X86InstrInfo.td

+2-1
Original file line numberDiff line numberDiff line change
@@ -878,9 +878,10 @@ def relocImm : ComplexPattern<iAny, 1, "selectRelocImm",
878878
// X86 Instruction Predicate Definitions.
879879
def TruePredicate : Predicate<"true">;
880880

881+
def HasEGPR : Predicate<"Subtarget->hasEGPR()">;
882+
def NoEGPR : Predicate<"!Subtarget->hasEGPR()">;
881883
def HasCMOV : Predicate<"Subtarget->canUseCMOV()">;
882884
def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">;
883-
884885
def HasNOPL : Predicate<"Subtarget->hasNOPL()">;
885886
def HasMMX : Predicate<"Subtarget->hasMMX()">;
886887
def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f -show-mc-encoding | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+egpr -show-mc-encoding | FileCheck --check-prefix=EGPR %s
3+
4+
define void @kmov(i1 %cmp23.not) {
5+
; CHECK-LABEL: kmov:
6+
; CHECK: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
7+
;
8+
; EGPR-LABEL: kmov:
9+
; EGPR: kmovw %edi, %k1 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x92,0xcf]
10+
entry:
11+
%0 = select i1 %cmp23.not, double 1.000000e+00, double 0.000000e+00
12+
store double %0, ptr null, align 8
13+
ret void
14+
}

0 commit comments

Comments
 (0)