[LoongArch] Implement COPY instruction between CFRs #69300

wangleiat · 2023-10-17T08:38:47Z

With this patch, all CFRs can be used for register allocation.

llvmbot · 2023-10-17T08:39:56Z

@llvm/pr-subscribers-backend-loongarch

Author: wanglei (wangleiat)

Changes

With this patch, all CFRs can be used for register allocation.

Full diff: https://github.com/llvm/llvm-project/pull/69300.diff

12 Files Affected:

(modified) llvm/lib/Target/LoongArch/LoongArch.h (+2)
(modified) llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp (+121)
(modified) llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td (+17)
(modified) llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td (+12)
(modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp (+6)
(modified) llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp (-7)
(modified) llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp (+1)
(modified) llvm/test/CodeGen/LoongArch/O0-pipeline.ll (+1)
(added) llvm/test/CodeGen/LoongArch/cfr-copy.mir (+34)
(added) llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir (+26)
(modified) llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir (+6-7)
(modified) llvm/test/CodeGen/LoongArch/opt-pipeline.ll (+1)

diff --git a/llvm/lib/Target/LoongArch/LoongArch.h b/llvm/lib/Target/LoongArch/LoongArch.h
index 05f4ac8c925584c..09ca089c91151bc 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.h
+++ b/llvm/lib/Target/LoongArch/LoongArch.h
@@ -36,9 +36,11 @@ bool lowerLoongArchMachineOperandToMCOperand(const MachineOperand &MO,
 FunctionPass *createLoongArchExpandAtomicPseudoPass();
 FunctionPass *createLoongArchISelDag(LoongArchTargetMachine &TM);
 FunctionPass *createLoongArchPreRAExpandPseudoPass();
+FunctionPass *createLoongArchExpandPseudoPass();
 void initializeLoongArchDAGToDAGISelPass(PassRegistry &);
 void initializeLoongArchExpandAtomicPseudoPass(PassRegistry &);
 void initializeLoongArchPreRAExpandPseudoPass(PassRegistry &);
+void initializeLoongArchExpandPseudoPass(PassRegistry &);
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCH_H
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index dd0b2cfde544baf..8ee0ba753494311 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -29,6 +29,8 @@ using namespace llvm;
 
 #define LOONGARCH_PRERA_EXPAND_PSEUDO_NAME                                     \
   "LoongArch Pre-RA pseudo instruction expansion pass"
+#define LOONGARCH_EXPAND_PSEUDO_NAME                                           \
+  "LoongArch pseudo instruction expansion pass"
 
 namespace {
 
@@ -513,15 +515,134 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL(
   return true;
 }
 
+class LoongArchExpandPseudo : public MachineFunctionPass {
+public:
+  const LoongArchInstrInfo *TII;
+  static char ID;
+
+  LoongArchExpandPseudo() : MachineFunctionPass(ID) {
+    initializeLoongArchExpandPseudoPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return LOONGARCH_EXPAND_PSEUDO_NAME;
+  }
+
+private:
+  bool expandMBB(MachineBasicBlock &MBB);
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NextMBBI);
+  bool expandCopyCFR(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                     MachineBasicBlock::iterator &NextMBBI);
+};
+
+char LoongArchExpandPseudo::ID = 0;
+
+bool LoongArchExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  TII =
+      static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  bool Modified = false;
+  for (auto &MBB : MF)
+    Modified |= expandMBB(MBB);
+
+  return Modified;
+}
+
+bool LoongArchExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool LoongArchExpandPseudo::expandMI(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     MachineBasicBlock::iterator &NextMBBI) {
+  switch (MBBI->getOpcode()) {
+  case LoongArch::PseudoCopyCFR:
+    return expandCopyCFR(MBB, MBBI, NextMBBI);
+  }
+
+  return false;
+}
+
+bool LoongArchExpandPseudo::expandCopyCFR(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  MachineFunction *MF = MBB.getParent();
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+
+  // Expand:
+  // MBB:
+  //    fcmp.caf.s  $dst, $fa0, $fa0 # set $dst 0(false)
+  //    bceqz $src, SinkMBB
+  // FalseBB:
+  //    fcmp.cueq.s $dst, $fa0, $fa0 # set $dst 1(true)
+  // SinkBB:
+  //    fallthrough
+
+  const BasicBlock *LLVM_BB = MBB.getBasicBlock();
+  auto *FalseBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  auto *SinkBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+  MF->insert(++MBB.getIterator(), FalseBB);
+  MF->insert(++FalseBB->getIterator(), SinkBB);
+
+  Register DestReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  // DestReg = 0
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::SET_CFR_FALSE), DestReg);
+  // Insert branch instruction.
+  BuildMI(MBB, MBBI, DL, TII->get(LoongArch::BCEQZ))
+      .addReg(SrcReg)
+      .addMBB(SinkBB);
+  // DestReg = 1
+  BuildMI(FalseBB, DL, TII->get(LoongArch::SET_CFR_TRUE), DestReg);
+
+  FalseBB->addSuccessor(SinkBB);
+
+  SinkBB->splice(SinkBB->end(), &MBB, MI, MBB.end());
+  SinkBB->transferSuccessors(&MBB);
+
+  MBB.addSuccessor(FalseBB);
+  MBB.addSuccessor(SinkBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+
+  // Make sure live-ins are correctly attached to this new basic block.
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *FalseBB);
+  computeAndAddLiveIns(LiveRegs, *SinkBB);
+
+  return true;
+}
+
 } // end namespace
 
 INITIALIZE_PASS(LoongArchPreRAExpandPseudo, "loongarch-prera-expand-pseudo",
                 LOONGARCH_PRERA_EXPAND_PSEUDO_NAME, false, false)
 
+INITIALIZE_PASS(LoongArchExpandPseudo, "loongarch-expand-pseudo",
+                LOONGARCH_EXPAND_PSEUDO_NAME, false, false)
+
 namespace llvm {
 
 FunctionPass *createLoongArchPreRAExpandPseudoPass() {
   return new LoongArchPreRAExpandPseudo();
 }
+FunctionPass *createLoongArchExpandPseudoPass() {
+  return new LoongArchExpandPseudo();
+}
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index d4d8736ec0caaea..2a62844578a7cdc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -126,6 +126,23 @@ def PseudoST_CFR : Pseudo<(outs),
 let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
 def PseudoLD_CFR : Pseudo<(outs CFR:$ccd),
                           (ins GPR:$rj, grlenimm:$imm)>;
+
+// SET_CFR_{FALSE,TRUE}
+// These instructions are defined in order to avoid expensive check error when
+// expanding `PseudoCopyCFR` instruction.
+// fcmp.caf.s $dst, $fa0, $fa0
+def SET_CFR_FALSE : SET_CFR<0x0c100000, "fcmp.caf.s">;
+// fcmp.cueq.s $dst, $fa0, $fa0
+def SET_CFR_TRUE  : SET_CFR<0x0c160000, "fcmp.cueq.s">;
+
+// Pseudo instruction for COPY CFRs.
+def PseudoCopyCFR : Pseudo<(outs CFR:$dst), (ins CFR:$src)> {
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let Size = 12;
+}
+
 } // Predicates = [HasBasicF]
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
index f853fca5c8b6757..f66f620ca8b26d4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
@@ -218,3 +218,15 @@ class FP_STORE_2RI12<bits<32> op, RegisterClass rc = FPR32>
     : FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12),
                  "$fd, $rj, $imm12">;
 } // hasSideEffects = 0, mayLoad = 0, mayStore = 1
+
+// This class is used to define `SET_CFR_{FALSE,TRUE}` instructions which are
+// used to expand `PseudoCopyCFR`.
+class SET_CFR<bits<32> op, string opcstr>
+    : FP_CMP<op> {
+  let isCodeGenOnly = 1;
+  let fj = 0; // fa0
+  let fk = 0; // fa0
+  let AsmString = opcstr # "\t$cd, $$fa0, $$fa0";
+  let OutOperandList = (outs CFR:$cd);
+  let InOperandList = (ins);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index 9fad3377a8fd842..a89902f95d88172 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -77,6 +77,12 @@ void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
         .addReg(SrcReg, getKillRegState(KillSrc));
     return;
   }
+  // CFR->CRR copy.
+  if (LoongArch::CFRRegClass.contains(DstReg, SrcReg)) {
+    BuildMI(MBB, MBBI, DL, get(LoongArch::PseudoCopyCFR), DstReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
 
   // FPR->FPR copies.
   unsigned Opc;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
index 4037c4d370bb8e4..257b947a3ce4365 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp
@@ -98,13 +98,6 @@ LoongArchRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   if (TFI->hasBP(MF))
     markSuperRegs(Reserved, LoongArchABI::getBPReg()); // bp
 
-  // FIXME: To avoid generating COPY instructions between CFRs, only use $fcc0.
-  // This is required to work around the fact that COPY instruction between CFRs
-  // is not provided in LoongArch.
-  if (MF.getSubtarget<LoongArchSubtarget>().hasBasicF())
-    for (size_t Reg = LoongArch::FCC1; Reg <= LoongArch::FCC7; ++Reg)
-      markSuperRegs(Reserved, Reg);
-
   assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
 }
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index c54a9b9c76c45b6..a5a4d78aceeef0c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -180,6 +180,7 @@ LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const {
 void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
 
 void LoongArchPassConfig::addPreEmitPass2() {
+  addPass(createLoongArchExpandPseudoPass());
   // Schedule the expansion of AtomicPseudos at the last possible moment,
   // avoiding the possibility for other passes to break the requirements for
   // forward progress in the LL/SC block.
diff --git a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
index 327e461eb69a98c..84d235d78eb9e59 100644
--- a/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/O0-pipeline.ll
@@ -69,6 +69,7 @@
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       Stack Frame Layout Analysis
+; CHECK-NEXT:       LoongArch pseudo instruction expansion pass
 ; CHECK-NEXT:       LoongArch atomic pseudo instruction expansion pass
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
diff --git a/llvm/test/CodeGen/LoongArch/cfr-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-copy.mir
new file mode 100644
index 000000000000000..4224c99081bca17
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/cfr-copy.mir
@@ -0,0 +1,34 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+# RUN: llc --mtriple=loongarch64 --mattr=+d %s -o - | FileCheck %s
+
+## Check the PseudoCopyCFR instruction expand.
+
+--- |
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
+  target triple = "loongarch64"
+
+  define void @test() {
+  ; CHECK-LABEL: test:
+  ; CHECK:       # %bb.0:
+  ; CHECK-NEXT:    fcmp.caf.s $fcc1, $fa0, $fa0
+  ; CHECK-NEXT:    bceqz $fcc0, .LBB0_2
+  ; CHECK-NEXT:  # %bb.1:
+  ; CHECK-NEXT:    fcmp.cueq.s $fcc1, $fa0, $fa0
+  ; CHECK-NEXT:  .LBB0_2:
+  ; CHECK-NEXT:    movcf2gr $a0, $fcc1
+  ; CHECK-NEXT:    ret
+    ret void
+  }
+...
+---
+name: test
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $fcc0
+
+    $fcc1 = COPY $fcc0
+    $r4 = COPY $fcc1
+    PseudoRET implicit killed $r4
+
+...
diff --git a/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
new file mode 100644
index 000000000000000..c5a6da72389f47f
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/cfr-pseudo-copy.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc --mtriple=loongarch64 --mattr=+d --stop-after=postrapseudos %s \
+# RUN:     -o - | FileCheck %s
+
+## Check the COPY instruction between CFRs.
+## A pseudo (PseudoCopyCFR) is generated after postrapseudos pass.
+
+...
+---
+name: test
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $fcc0
+
+    ; CHECK-LABEL: name: test
+    ; CHECK: liveins: $fcc0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $fcc1 = PseudoCopyCFR $fcc0
+    ; CHECK-NEXT: $r4 = MOVCF2GR killed $fcc1
+    ; CHECK-NEXT: PseudoRET implicit killed $r4
+    $fcc1 = COPY $fcc0
+    $r4 = COPY $fcc1
+    PseudoRET implicit killed $r4
+
+...
diff --git a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
index fa5fccb1a5ba188..18dbc5ca2e123ec 100644
--- a/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
+++ b/llvm/test/CodeGen/LoongArch/inline-asm-clobbers-fcc.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc --mtriple=loongarch64 --mattr=+d --run-pass=greedy %s -o - | FileCheck %s
+# RUN: llc --mtriple=loongarch64 --mattr=+d --regalloc=fast \
+# RUN:     --stop-before=postra-machine-sink %s -o - | FileCheck %s
 
 ## Check that fcc register clobbered by inlineasm is correctly saved by examing
 ## a pair of pseudos (PseudoST_CFR and PseudoLD_CFR) are generated before and
@@ -15,13 +16,11 @@ body:             |
     ; CHECK-LABEL: name: test
     ; CHECK: liveins: $f0_64, $f1_64
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $f1_64
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr64 = COPY $f0_64
-    ; CHECK-NEXT: [[FCMP_CLT_D:%[0-9]+]]:cfr = FCMP_CLT_D [[COPY]], [[COPY1]]
-    ; CHECK-NEXT: PseudoST_CFR [[FCMP_CLT_D]], %stack.0, 0 :: (store (s64) into %stack.0)
+    ; CHECK-NEXT: renamable $fcc0 = FCMP_CLT_D renamable $f1_64, renamable $f0_64
+    ; CHECK-NEXT: PseudoST_CFR $fcc0, %stack.0, 0 :: (store (s64) into %stack.0)
     ; CHECK-NEXT: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $fcc0
-    ; CHECK-NEXT: [[PseudoLD_CFR:%[0-9]+]]:cfr = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
-    ; CHECK-NEXT: $r4 = COPY [[PseudoLD_CFR]]
+    ; CHECK-NEXT: $fcc0 = PseudoLD_CFR %stack.0, 0 :: (load (s64) from %stack.0)
+    ; CHECK-NEXT: $r4 = COPY killed renamable $fcc0
     ; CHECK-NEXT: PseudoRET implicit killed $r4
     %1:fpr64 = COPY $f1_64
     %0:fpr64 = COPY $f0_64
diff --git a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
index 8b1d635b605b32a..3134d940545e800 100644
--- a/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
+++ b/llvm/test/CodeGen/LoongArch/opt-pipeline.ll
@@ -165,6 +165,7 @@
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter
 ; CHECK-NEXT:       Stack Frame Layout Analysis
+; CHECK-NEXT:       LoongArch pseudo instruction expansion pass
 ; CHECK-NEXT:       LoongArch atomic pseudo instruction expansion pass
 ; CHECK-NEXT:       Lazy Machine Block Frequency Analysis
 ; CHECK-NEXT:       Machine Optimization Remark Emitter

wangleiat · 2023-10-17T08:46:09Z

@xen0n @xry111

llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp

xen0n · 2023-10-17T08:57:14Z

llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

                          (ins GPR:$rj, grlenimm:$imm)>;
+
+// SET_CFR_{FALSE,TRUE}
+// These instructions are defined in order to avoid expensive check error when


What error is caused by not defining these shortcut nodes? Maybe you mean "... in order to avoid expensive checks if regular instruction patterns are used"?

If use regular instruction will generate the following errors:

*** Bad machine code: Using an undefined physical register *** - function: test - basic block: %bb.0 (0x8deaa0) - instruction: $fcc1 = FCMP_CAF_S $f0, $f0 - operand 2: $f0

Hmm that seems something to fix... maybe later. (I don't have much free time recently for that, unfortunately.)

llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp

xen0n · 2023-10-17T08:59:20Z

llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp

+
+  // Expand:
+  // MBB:
+  //    fcmp.caf.s  $dst, $fa0, $fa0 # set $dst 0(false)


I don't know if movgr2cf $dst, $zero would be better micro-architecture-wise, perhaps you know better?

FWIW GCC uses movgr2cf %0,$r0 for zeroing a fcc. But I'm not sure which is micro-architecture-wise better too.

Some tests show movgr2cf is slower than other insns, but not sure about fcmp.caf.s.

Phew:

$ cat t.S .globl main main: li.w $a0, 1000000 .L0: .rept 100 #if USE_MOVGR2CF movgr2cf $fcc0, $r0 #else fcmp.caf.s $fcc0, $f0, $f0 #endif .endr addi.w $a0, $a0, -1 bnez $a0, .L0 li.w $a0, 0 jr $ra $ gcc t.S -DUSE_MOVGR2CF $ time ./a.out real 0m0.688s user 0m0.687s sys 0m0.001s $ gcc t.S $ time ./a.out real 0m0.024s user 0m0.023s sys 0m0.000s

So fcmp.caf.s is indeed better...

Then maybe documenting this finding would be beneficial (and GCC could use some micro-optimization too)!

(And you could inform the HW team to add short-circuiting for movgr2cf *, $zero so it doesn't naïvely goes to the ALU unconditionally (which I expect to be the reason of slowdown) in that case. So we can have the semantically-equivalent patterns execute at the same speed without surprises on future models...)

Thanks! Co-authored-by: WÁNG Xuěruì <[email protected]>

xen0n

LGTM, thanks!

During the review of a LLVM change [1], on LA464 we found that zeroing a fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. [1]: llvm/llvm-project#69300 gcc/ChangeLog: * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for zeroing a fcc.

@xen0n

Address @xen0n's comments.

During the review of an LLVM change [1], on LA464 we found that zeroing an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. [1]: llvm/llvm-project#69300 gcc/ChangeLog: * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for zeroing a fcc.

With this patch, all CFRs can be used for register allocation. (cherry picked from commit 271087e)

During the review of an LLVM change [1], on LA464 we found that zeroing an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0. [1]: llvm/llvm-project#69300 gcc/ChangeLog: * config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for zeroing a fcc.

[LoongArch] Implement COPY instruction between CFRs

e001613

With this patch, all CFRs can be used for register allocation.

wangleiat requested a review from SixWeining October 17, 2023 08:39

llvmbot added the backend:loongarch label Oct 17, 2023

wangleiat requested a review from heiher October 17, 2023 08:44

xen0n reviewed Oct 17, 2023

View reviewed changes

wangleiat and others added 3 commits October 17, 2023 17:29

Update llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp

bb5b25c

Thanks! Co-authored-by: WÁNG Xuěruì <[email protected]>

Update llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td

63dfee9

Thanks! Co-authored-by: WÁNG Xuěruì <[email protected]>

Update llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp

f322789

Thanks! Co-authored-by: WÁNG Xuěruì <[email protected]>

xen0n approved these changes Oct 17, 2023

View reviewed changes

heiher approved these changes Oct 17, 2023

View reviewed changes

SixWeining approved these changes Oct 17, 2023

View reviewed changes

Update LoongArchFloat32InstrInfo.td

aba3a1c

Address @xen0n's comments.

wangleiat merged commit 271087e into llvm:main Oct 19, 2023

wangleiat deleted the cfr branch October 19, 2023 02:01

madhur13490 mentioned this pull request Oct 20, 2023

Revert commit ba8565fbcb975e2d067ce3ae5a7dbaae4953edd3 madhur13490/llvm-project#3

Closed

banach-space mentioned this pull request Oct 24, 2023

[mlir][vector] Add scalable vectors to tests for vector.contract #70039

Merged

tru pushed a commit that referenced this pull request Oct 27, 2023

[LoongArch] Implement COPY instruction between CFRs (#69300)

4b7f415

With this patch, all CFRs can be used for register allocation. (cherry picked from commit 271087e)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[LoongArch] Implement COPY instruction between CFRs #69300

[LoongArch] Implement COPY instruction between CFRs #69300

Uh oh!

wangleiat commented Oct 17, 2023

Uh oh!

llvmbot commented Oct 17, 2023

Uh oh!

wangleiat commented Oct 17, 2023

Uh oh!

Uh oh!

xen0n Oct 17, 2023

Uh oh!

wangleiat Oct 17, 2023

Uh oh!

xen0n Oct 17, 2023

Uh oh!

Uh oh!

Uh oh!

xen0n Oct 17, 2023

Uh oh!

xry111 Oct 17, 2023

Uh oh!

SixWeining Oct 17, 2023

Uh oh!

xry111 Oct 17, 2023

Uh oh!

xen0n Oct 17, 2023

Uh oh!

xen0n Oct 17, 2023

Uh oh!

xen0n left a comment

Uh oh!

Uh oh!

[LoongArch] Implement COPY instruction between CFRs #69300

[LoongArch] Implement COPY instruction between CFRs #69300

Uh oh!

Conversation

wangleiat commented Oct 17, 2023

Uh oh!

llvmbot commented Oct 17, 2023

Uh oh!

wangleiat commented Oct 17, 2023

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

xen0n left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!