-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[LoongArch] Implement COPY instruction between CFRs #69300
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
With this patch, all CFRs can be used for register allocation.
@llvm/pr-subscribers-backend-loongarch Author: wanglei (wangleiat) ChangesWith this patch, all CFRs can be used for register allocation. Full diff: https://github.com/llvm/llvm-project/pull/69300.diff 12 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
index 05f4ac8c925584c..09ca089c91151bc 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.h
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -36,9 +36,11 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
FunctionPass *createLoongArchExpandAtomicPseudoPass();
FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
FunctionPass *createLoongArchPreRAExpandPseudoPass();
+FunctionPass *createLoongArchExpandPseudoPass();
void initializeLoongArchDAGToDAGISelPass(PassRegistry &);
void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &);
+void initializeLoongArchExpandPseudoPass(PassRegistry &);
} // end namespace llvm
#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index dd0b2cfde544baf..8ee0ba753494311 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -29,6 +29,8 @@ using namespace llvm;
#define LOONGARCH_PRERA_EXPAND_PSEUDO_NAME \
"LoongArch Pre-RA pseudo instruction expansion pass"
+#define LOONGARCH_EXPAND_PSEUDO_NAME \
+ "LoongArch pseudo instruction expansion pass"
namespace {
@@ -513,15 +515,134 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL(
return true;
}
+class LoongArchExpandPseudo : public MachineFunctionPass {
+public:
+ const LoongArchInstrInfo *TII;
+ static char ID;
+
+ LoongArchExpandPseudo() : MachineFunctionPass(ID) {
+ initializeLoongArchExpandPseudoPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override {
+ return LOONGARCH_EXPAND_PSEUDO_NAME;
+ }
+
+private:
+ bool expandMBB(MachineBasicBlock &MBB);
+ bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+ bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
+};
+
+char LoongArchExpandPseudo::ID = 0;
+
+bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII =
+ static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+ bool Modified = false;
+ for (auto &MBB : MF)
+ Modified |= expandMBB(MBB);
+
+ return Modified;
+}
+
+bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+ Modified |= expandMI(MBB, MBBI, NMBBI);
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ switch (MBBI->getOpcode()) {
+ case LoongArch::PseudoCopyCFR:
+ return expandCopyCFR(MBB, MBBI, NextMBBI);
+ }
+
+ return false;
+}
+
+bool LoongArchExpandPseudo::expandCopyCFR(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI) {
+ MachineFunction *MF = MBB.getParent();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ // Expand:
+ // MBB:
+ // fcmp.caf.s $dst, $fa0, $fa0 # set $dst 0(false)
+ // bceqz $src, SinkMBB
+ // FalseBB:
+ // fcmp.cueq.s $dst, $fa0, $fa0 # set $dst 1(true)
+ // SinkBB:
+ // fallthrough
+
+ const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+ auto *FalseBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ auto *SinkBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+ MF->insert(++MBB.getIterator(), FalseBB);
+ MF->insert(++FalseBB->getIterator(), SinkBB);
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ // DestReg = 0
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::SET_CFR_FALSE), DestReg);
+ // Insert branch instruction.
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::BCEQZ))
+ .addReg(SrcReg)
+ .addMBB(SinkBB);
+ // DestReg = 1
+ BuildMI(FalseBB, DL, TII->get(LoongArch::SET_CFR_TRUE), DestReg);
+
+ FalseBB->addSuccessor(SinkBB);
+
+ SinkBB->splice(SinkBB->end(), &MBB, MI, MBB.end());
+ SinkBB->transferSuccessors(&MBB);
+
+ MBB.addSuccessor(FalseBB);
+ MBB.addSuccessor(SinkBB);
+
+ NextMBBI = MBB.end();
+ MI.eraseFromParent();
+
+ // Make sure live-ins are correctly attached to this new basic block.
+ LivePhysRegs LiveRegs;
+ computeAndAddLiveIns(LiveRegs, *FalseBB);
+ computeAndAddLiveIns(LiveRegs, *SinkBB);
+
+ return true;
+}
+
} // end namespace
INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo",
LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false)
+INITIALIZE_PASS(LoongArchExpandPseudo, "loongarch-expand-pseudo",
+ LOONGARCH_EXPAND_PSEUDO_NAME, false, false)
+
namespace llvm {
FunctionPass *createLoongArchPreRAExpandPseudoPass() {
return new LoongArchPreRAExpandPseudo();
}
+FunctionPass *createLoongArchExpandPseudoPass() {
+ return new LoongArchExpandPseudo();
+}
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index d4d8736ec0caaea..2a62844578a7cdc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -126,6 +126,23 @@ def PseudoST_CFR : Pseudo<(outs),
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
def PseudoLD_CFR : Pseudo<(outs CFR:$ccd),
(ins GPR:$rj, grlenimm:$imm)>;
+
+// SET_CFR_{FALSE,TRUE}
+// These instructions are defined in order to avoid expensive check error when
+// expanding `PseudoCopyCFR` instruction.
+// fcmp.caf.s $dst, $fa0, $fa0
+def SET_CFR_FALSE : SET_CFR<0x0c100000, "fcmp.caf.s">;
+// fcmp.cueq.s $dst, $fa0, $fa0
+def SET_CFR_TRUE : SET_CFR<0x0c160000, "fcmp.cueq.s">;
+
+// Pseudo instruction for COPY CFRs.
+def PseudoCopyCFR : Pseudo<(outs CFR:$dst), (ins CFR:$src)> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Size = 12;
+}
+
} // Predicates = [HasBasicF]
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
index f853fca5c8b6757..f66f620ca8b26d4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
@@ -218,3 +218,15 @@ class FP_STORE_2RI12<bits<32> op, RegisterClass rc = FPR32>
: FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12),
"$fd, $rj, $imm12">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
+
+// This class is used to define `SET_CFR_{FALSE,TRUE}` instructions which are
+// used to expand `PseudoCopyCFR`.
+class SET_CFR<bits<32> op, string opcstr>
+ : FP_CMP<op> {
+ let isCodeGenOnly = 1;
+ let fj = 0; // fa0
+ let fk = 0; // fa0
+ let AsmString = opcstr # "\t$cd, $$fa0, $$fa0";
+ let OutOperandList = (outs CFR:$cd);
+ let InOperandList = (ins);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 9fad3377a8fd842..a89902f95d88172 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -77,6 +77,12 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
+ // CFR->CRR copy.
+ if (LoongArch::CFRRegClass.contains(DstReg, SrcReg)) {
+ BuildMI(MBB, MBBI, DL, get(LoongArch::PseudoCopyCFR), DstReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
// FPR->FPR copies.
unsigned Opc;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
index 4037c4d370bb8e4..257b947a3ce4365 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
@@ -98,13 +98,6 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasBP(MF))
markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp
- // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0.
- // This is required to work around the fact that COPY instruction between CFRs
- // is not provided in LoongArch.
- if (MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
- for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg)
- markSuperRegs(Reserved, Reg);
-
assert(checkAllSuperRegsMarked(Reserved));
return Reserved;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index c54a9b9c76c45b6..a5a4d78aceeef0c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -180,6 +180,7 @@ LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const {
void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
void LoongArchPassConfig::addPreEmitPass2() {
+ addPass(createLoongArchExpandPseudoPass());
// Schedule the expansion of AtomicPseudos at the last possible moment,
// avoiding the possibility for other passes to break the requirements for
// forward progress in the LL/SC block.
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index 327e461eb69a98c..84d235d78eb9e59 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -69,6 +69,7 @@
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
+; CHECK-NEXT: LoongArch pseudo instruction expansion pass
; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
diff --git a/llvm/test/CodeGen/LoongArch/cfr-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-copy.mir
new file mode 100644
index 000000000000000..4224c99081bca17
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/cfr-copy.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+# RUN: llc --mtriple=loongarch64 --mattr=+d %s -o - | FileCheck %s
+
+## Check the PseudoCopyCFR instruction expand.
+
+--- |
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+ target triple = "loongarch64"
+
+ define void @test() {
+ ; CHECK-LABEL: test:
+ ; CHECK: # %bb.0:
+ ; CHECK-NEXT: fcmp.caf.s $fcc1, $fa0, $fa0
+ ; CHECK-NEXT: bceqz $fcc0, .LBB0_2
+ ; CHECK-NEXT: # %bb.1:
+ ; CHECK-NEXT: fcmp.cueq.s $fcc1, $fa0, $fa0
+ ; CHECK-NEXT: .LBB0_2:
+ ; CHECK-NEXT: movcf2gr $a0, $fcc1
+ ; CHECK-NEXT: ret
+ ret void
+ }
+...
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $fcc0
+
+ $fcc1 = COPY $fcc0
+ $r4 = COPY $fcc1
+ PseudoRET implicit killed $r4
+
+...
diff --git a/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
new file mode 100644
index 000000000000000..c5a6da72389f47f
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc --mtriple=loongarch64 --mattr=+d --stop-after=postrapseudos %s \
+# RUN: -o - | FileCheck %s
+
+## Check the COPY instruction between CFRs.
+## A pseudo (PseudoCopyCFR) is generated after postrapseudos pass.
+
+...
+---
+name: test
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $fcc0
+
+ ; CHECK-LABEL: name: test
+ ; CHECK: liveins: $fcc0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $fcc1 = PseudoCopyCFR $fcc0
+ ; CHECK-NEXT: $r4 = MOVCF2GR killed $fcc1
+ ; CHECK-NEXT: PseudoRET implicit killed $r4
+ $fcc1 = COPY $fcc0
+ $r4 = COPY $fcc1
+ PseudoRET implicit killed $r4
+
+...
diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
index fa5fccb1a5ba188..18dbc5ca2e123ec 100644
--- a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
+++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s
+# RUN: llc --mtriple=loongarch64 --mattr=+d --regalloc=fast \
+# RUN: --stop-before=postra-machine-sink %s -o - | FileCheck %s
## Check that fcc register clobbered by inlineasm is correctly saved by examing
## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and
@@ -15,13 +16,11 @@ body: |
; CHECK-LABEL: name: test
; CHECK: liveins: $f0_64, $f1_64
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64
- ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]]
- ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0)
+ ; CHECK-NEXT: renamable $fcc0 = FCMP_CLT_D renamable $f1_64, renamable $f0_64
+ ; CHECK-NEXT: PseudoST_CFR $fcc0, %stack.0, 0 :: (store (s64) into %stack.0)
; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0
- ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
- ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]]
+ ; CHECK-NEXT: $fcc0 = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
+ ; CHECK-NEXT: $r4 = COPY killed renamable $fcc0
; CHECK-NEXT: PseudoRET implicit killed $r4
%1:fpr64 = COPY $f1_64
%0:fpr64 = COPY $f0_64
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 8b1d635b605b32a..3134d940545e800 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -165,6 +165,7 @@
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
; CHECK-NEXT: Stack Frame Layout Analysis
+; CHECK-NEXT: LoongArch pseudo instruction expansion pass
; CHECK-NEXT: LoongArch atomic pseudo instruction expansion pass
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Machine Optimization Remark Emitter
|
(ins GPR:$rj, grlenimm:$imm)>; | ||
|
||
// SET_CFR_{FALSE,TRUE} | ||
// These instructions are defined in order to avoid expensive check error when |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What error is caused by not defining these shortcut nodes? Maybe you mean "... in order to avoid expensive checks if regular instruction patterns are used"?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If use regular instruction will generate the following errors:
*** Bad machine code: Using an undefined physical register ***
- function: test
- basic block: %bb.0 (0x8deaa0)
- instruction: $fcc1 = FCMP_CAF_S $f0, $f0
- operand 2: $f0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm that seems something to fix... maybe later. (I don't have much free time recently for that, unfortunately.)
|
||
// Expand: | ||
// MBB: | ||
// fcmp.caf.s $dst, $fa0, $fa0 # set $dst 0(false) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know if movgr2cf $dst, $zero
would be better micro-architecture-wise, perhaps you know better?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW GCC uses movgr2cf %0,$r0
for zeroing a fcc. But I'm not sure which is micro-architecture-wise better too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some tests show movgr2cf
is slower than other insns, but not sure about fcmp.caf.s
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Phew:
$ cat t.S
.globl main
main:
li.w $a0, 1000000
.L0:
.rept 100
#if USE_MOVGR2CF
movgr2cf $fcc0, $r0
#else
fcmp.caf.s $fcc0, $f0, $f0
#endif
.endr
addi.w $a0, $a0, -1
bnez $a0, .L0
li.w $a0, 0
jr $ra
$ gcc t.S -DUSE_MOVGR2CF
$ time ./a.out
real 0m0.688s
user 0m0.687s
sys 0m0.001s
$ gcc t.S
$ time ./a.out
real 0m0.024s
user 0m0.023s
sys 0m0.000s
So fcmp.caf.s
is indeed better...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Then maybe documenting this finding would be beneficial (and GCC could use some micro-optimization too)!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(And you could inform the HW team to add short-circuiting for movgr2cf *, $zero
so it doesn't naïvely goes to the ALU unconditionally (which I expect to be the reason of slowdown) in that case. So we can have the semantically-equivalent patterns execute at the same speed without surprises on future models...)
Thanks! Co-authored-by: WÁNG Xuěruì <[email protected]>
Thanks! Co-authored-by: WÁNG Xuěruì <[email protected]>
Thanks! Co-authored-by: WÁNG Xuěruì <[email protected]>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
During the review of a LLVM change [1], on LA464 we found that zeroing a fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. [1]: llvm/llvm-project#69300 gcc/ChangeLog: * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for zeroing a fcc.
Address @xen0n's comments.
During the review of an LLVM change [1], on LA464 we found that zeroing an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. [1]: llvm/llvm-project#69300 gcc/ChangeLog: * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for zeroing a fcc.
With this patch, all CFRs can be used for register allocation. (cherry picked from commit 271087e)
During the review of an LLVM change [1], on LA464 we found that zeroing an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. [1]: llvm/llvm-project#69300 gcc/ChangeLog: * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for zeroing a fcc.
With this patch, all CFRs can be used for register allocation.