diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index 8bb9497a847fa..d643017b90db1 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -40,6 +40,9 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) { } switch (MI->getOpcode()) { + case TargetOpcode::STATEPOINT: + LowerSTATEPOINT(*MI); + return; case TargetOpcode::PATCHABLE_FUNCTION_ENTER: LowerPATCHABLE_FUNCTION_ENTER(*MI); return; @@ -146,6 +149,46 @@ bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } +void LoongArchAsmPrinter::LowerSTATEPOINT(const MachineInstr &MI) { + StatepointOpers SOpers(&MI); + if (unsigned PatchBytes = SOpers.getNumPatchBytes()) { + assert(PatchBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + emitNops(PatchBytes / 4); + } else { + // Lower call target and choose correct opcode. + const MachineOperand &CallTarget = SOpers.getCallTarget(); + MCOperand CallTargetMCOp; + switch (CallTarget.getType()) { + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + lowerOperand(CallTarget, CallTargetMCOp); + EmitToStreamer(*OutStreamer, + MCInstBuilder(LoongArch::BL).addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Immediate: + CallTargetMCOp = MCOperand::createImm(CallTarget.getImm()); + EmitToStreamer(*OutStreamer, + MCInstBuilder(LoongArch::BL).addOperand(CallTargetMCOp)); + break; + case MachineOperand::MO_Register: + CallTargetMCOp = MCOperand::createReg(CallTarget.getReg()); + EmitToStreamer(*OutStreamer, MCInstBuilder(LoongArch::JIRL) + .addReg(LoongArch::R1) + .addOperand(CallTargetMCOp) + .addImm(0)); + break; + default: + llvm_unreachable("Unsupported operand type in statepoint call target"); + break; + } + } + + auto &Ctx = OutStreamer->getContext(); + MCSymbol *MILabel = Ctx.createTempSymbol(); + OutStreamer->emitLabel(MILabel); + SM.recordStatepoint(*MILabel, MI); +} + void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER( const MachineInstr &MI) { const Function &F = MF->getFunction(); diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h index 9da90886627ec..fc12f1079490b 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h @@ -15,6 +15,7 @@ #include "LoongArchSubtarget.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/Compiler.h" @@ -41,6 +42,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchAsmPrinter : public AsmPrinter { bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS) override; + void LowerSTATEPOINT(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI); void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index cfd0ecb70a4e5..bfafb33175210 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -4588,6 +4588,20 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter( return emitPseudoXVINSGR2VR(MI, BB, Subtarget); case LoongArch::PseudoCTPOP: return emitPseudoCTPOP(MI, BB, Subtarget); + case TargetOpcode::STATEPOINT: + // STATEPOINT is a pseudo instruction which has no implicit defs/uses + // while bl call instruction (where statepoint will be lowered at the + // end) has implicit def. This def is early-clobber as it will be set at + // the moment of the call and earlier than any use is read. + // Add this implicit dead def here as a workaround. + MI.addOperand(*MI.getMF(), + MachineOperand::CreateReg( + LoongArch::R1, /*isDef*/ true, + /*isImp*/ true, /*isKill*/ false, /*isDead*/ true, + /*isUndef*/ false, /*isEarlyClobber*/ true)); + if (!Subtarget.is64Bit()) + report_fatal_error("STATEPOINT is only supported on 64-bit targets"); + return emitPatchPoint(MI, BB); } } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp index d1af65192ee61..a01f2ed3b5880 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/LoongArchMCTargetDesc.h" #include "MCTargetDesc/LoongArchMatInt.h" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/MC/MCInstBuilder.h" using namespace llvm; @@ -236,7 +237,25 @@ unsigned LoongArchInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI); } - return MI.getDesc().getSize(); + + unsigned NumBytes = 0; + const MCInstrDesc &Desc = MI.getDesc(); + + // Size should be preferably set in + // llvm/lib/Target/LoongArch/LoongArch*InstrInfo.td (default case). + // Specific cases handle instructions of variable sizes. + switch (Desc.getOpcode()) { + default: + return Desc.getSize(); + case TargetOpcode::STATEPOINT: + NumBytes = StatepointOpers(&MI).getNumPatchBytes(); + assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!"); + // No patch bytes means a normal call inst (i.e. `bl`) is emitted. + if (NumBytes == 0) + NumBytes = 4; + break; + } + return NumBytes; } bool LoongArchInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { diff --git a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll new file mode 100644 index 0000000000000..4a77b2c00f54c --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering-r1.ll @@ -0,0 +1,13 @@ +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs --stop-after=prologepilog < %s | FileCheck %s + +;; Check that STATEPOINT instruction has an early clobber implicit def for R1. + +define void @test() gc "statepoint-example" { +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" ()] +; CHECK: STATEPOINT 0, 0, 0, target-flags(loongarch-call-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, csr_ilp32s_lp64s, implicit-def $r3, implicit-def dead early-clobber $r1 + ret void +} + +declare void @return_i1() +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) diff --git a/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll new file mode 100644 index 0000000000000..6956929e721d7 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/statepoint-call-lowering.ll @@ -0,0 +1,228 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 -verify-machineinstrs < %s | FileCheck %s +;; A collection of basic functionality tests for statepoint lowering - most +;; interesting cornercases are exercised through the x86 tests. + +%struct = type { i64, i64 } + +declare zeroext i1 @return_i1() +declare zeroext i32 @return_i32() +declare ptr @return_i32ptr() +declare float @return_float() +declare %struct @return_struct() +declare void @varargf(i32, ...) + +define i1 @test_i1_return() nounwind gc "statepoint-example" { +;; This is just checking that a i1 gets lowered normally when there's no extra +;; state arguments to the statepoint +; CHECK-LABEL: test_i1_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_i1) +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +define i32 @test_i32_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_i32_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_i32) +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token) + ret i32 %call1 +} + +define ptr @test_i32ptr_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_i32ptr_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_i32ptr) +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(ptr ()) @return_i32ptr, i32 0, i32 0, i32 0, i32 0) + %call1 = call ptr @llvm.experimental.gc.result.p0(token %safepoint_token) + ret ptr %call1 +} + +define float @test_float_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_float_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_float) +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(float ()) @return_float, i32 0, i32 0, i32 0, i32 0) + %call1 = call float @llvm.experimental.gc.result.f32(token %safepoint_token) + ret float %call1 +} + +define %struct @test_struct_return() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_struct_return: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: bl %plt(return_struct) +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(%struct ()) @return_struct, i32 0, i32 0, i32 0, i32 0) + %call1 = call %struct @llvm.experimental.gc.result.struct(token %safepoint_token) + ret %struct %call1 +} + +define i1 @test_relocate(ptr addrspace(1) %a) nounwind gc "statepoint-example" { +; CHECK-LABEL: test_relocate: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: bl %plt(return_i1) +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a)] + %call1 = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call2 +} + +define void @test_void_vararg() nounwind gc "statepoint-example" { +; CHECK-LABEL: test_void_vararg: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: ori $a0, $zero, 42 +; CHECK-NEXT: ori $a1, $zero, 43 +; CHECK-NEXT: bl %plt(varargf) +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (i32, ...)) @varargf, i32 2, i32 0, i32 42, i32 43, i32 0, i32 0) + ;; if we try to use the result from a statepoint wrapping a + ;; non-void-returning varargf, we will experience a crash. + ret void +} + +define i1 @test_i1_return_patchable() nounwind gc "statepoint-example" { +;; A patchable variant of test_i1_return +; CHECK-LABEL: test_i1_return_patchable: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -16 +; CHECK-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; CHECK-NEXT: nop +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 16 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 4, ptr elementtype(i1 ()) null, i32 0, i32 0, i32 0, i32 0) + %call1 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + ret i1 %call1 +} + +declare void @consume(ptr addrspace(1) %obj) + +define i1 @test_cross_bb(ptr addrspace(1) %a, i1 %external_cond) nounwind gc "statepoint-example" { +; CHECK-LABEL: test_cross_bb: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; CHECK-NEXT: andi $fp, $a1, 1 +; CHECK-NEXT: st.d $a0, $sp, 8 +; CHECK-NEXT: bl %plt(return_i1) +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: beqz $fp, .LBB8_2 +; CHECK-NEXT: # %bb.1: # %left +; CHECK-NEXT: ld.d $a1, $sp, 8 +; CHECK-NEXT: move $fp, $a0 +; CHECK-NEXT: move $a0, $a1 +; CHECK-NEXT: bl %plt(consume) +; CHECK-NEXT: move $a0, $fp +; CHECK-NEXT: b .LBB8_3 +; CHECK-NEXT: .LBB8_2: # %right +; CHECK-NEXT: ori $a0, $zero, 1 +; CHECK-NEXT: .LBB8_3: # %right +; CHECK-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: ret +entry: + %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i1 ()) @return_i1, i32 0, i32 0, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %a)] + br i1 %external_cond, label %left, label %right + +left: + %call1 = call ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token %safepoint_token, i32 0, i32 0) + %call2 = call zeroext i1 @llvm.experimental.gc.result.i1(token %safepoint_token) + call void @consume(ptr addrspace(1) %call1) + ret i1 %call2 + +right: + ret i1 true +} + +%struct2 = type { i64, i64, i64 } + +declare void @consume_attributes(i32, ptr nest, i32, ptr byval(%struct2)) + +define void @test_attributes(ptr byval(%struct2) %s) nounwind gc "statepoint-example" { +; CHECK-LABEL: test_attributes: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi.d $sp, $sp, -32 +; CHECK-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; CHECK-NEXT: ld.d $a1, $a0, 16 +; CHECK-NEXT: st.d $a1, $sp, 16 +; CHECK-NEXT: ld.d $a1, $a0, 8 +; CHECK-NEXT: st.d $a1, $sp, 8 +; CHECK-NEXT: ld.d $a0, $a0, 0 +; CHECK-NEXT: st.d $a0, $sp, 0 +; CHECK-NEXT: ori $a0, $zero, 42 +; CHECK-NEXT: ori $a2, $zero, 17 +; CHECK-NEXT: addi.d $a3, $sp, 0 +; CHECK-NEXT: move $a1, $zero +; CHECK-NEXT: bl %plt(consume_attributes) +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; CHECK-NEXT: addi.d $sp, $sp, 32 +; CHECK-NEXT: ret +entry: +;; We call a function that has a nest argument and a byval argument. + %statepoint_token = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (i32, ptr, i32, ptr)) @consume_attributes, i32 4, i32 0, i32 42, ptr nest null, i32 17, ptr byval(%struct2) %s, i32 0, i32 0) + ret void +} + +declare token @llvm.experimental.gc.statepoint.p0(i64, i32, ptr, i32, i32, ...) +declare i1 @llvm.experimental.gc.result.i1(token) +declare i32 @llvm.experimental.gc.result.i32(token) +declare ptr @llvm.experimental.gc.result.p0(token) +declare float @llvm.experimental.gc.result.f32(token) +declare %struct @llvm.experimental.gc.result.struct(token) +declare ptr addrspace(1) @llvm.experimental.gc.relocate.p1(token, i32, i32) diff --git a/llvm/unittests/Target/LoongArch/InstSizes.cpp b/llvm/unittests/Target/LoongArch/InstSizes.cpp index da78012e20254..2a0e9e20f7243 100644 --- a/llvm/unittests/Target/LoongArch/InstSizes.cpp +++ b/llvm/unittests/Target/LoongArch/InstSizes.cpp @@ -140,3 +140,18 @@ TEST(InstSizes, AtomicPseudo) { EXPECT_EQ(44u, II.getInstSizeInBytes(*I)); }); } + +TEST(InstSizes, StatePoint) { + std::unique_ptr TM = createTargetMachine(); + std::unique_ptr II = createInstrInfo(TM.get()); + + runChecks( + TM.get(), II.get(), " declare zeroext i1 @return_i1()\n", + // clang-format off + " STATEPOINT 0, 0, 0, target-flags(loongarch-call-plt) @return_i1, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, 2, 0, implicit-def $r3, implicit-def $r4\n", + // clang-format on + [](LoongArchInstrInfo &II, MachineFunction &MF) { + auto I = MF.begin()->begin(); + EXPECT_EQ(4u, II.getInstSizeInBytes(*I)); + }); +}