From 549e4ea5b292e558e085d881abd4c93f29352029 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 21 Jan 2024 00:25:34 -0800 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20initia?= =?UTF-8?q?l=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.4 --- llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 13 ++- llvm/lib/Target/AArch64/AArch64FastISel.cpp | 7 ++ .../Target/AArch64/AArch64ISelLowering.cpp | 9 ++- llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 11 +-- .../AArch64/GISel/AArch64CallLowering.cpp | 13 ++- .../GISel/AArch64InstructionSelector.cpp | 16 +++- .../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 + llvm/test/CodeGen/AArch64/nonlazybind.ll | 81 +++++++++---------- 8 files changed, 93 insertions(+), 60 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp index ccd9b13d730b6..d3484e5229e70 100644 --- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -144,9 +144,16 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB, // Try looking through a bitcast from one function type to another. // Commonly happens with calls to objc_msgSend(). const Value *CalleeV = CB.getCalledOperand()->stripPointerCasts(); - if (const Function *F = dyn_cast(CalleeV)) - Info.Callee = MachineOperand::CreateGA(F, 0); - else if (isa(CalleeV) || isa(CalleeV)) { + if (const Function *F = dyn_cast(CalleeV)) { + if (F->hasFnAttribute(Attribute::NonLazyBind)) { + auto Reg = + MRI.createGenericVirtualRegister(getLLTForType(*F->getType(), DL)); + MIRBuilder.buildGlobalValue(Reg, F); + Info.Callee = MachineOperand::CreateReg(Reg, false); + } else { + Info.Callee = MachineOperand::CreateGA(F, 0); + } + } else if (isa(CalleeV) || isa(CalleeV)) { // IR IFuncs and Aliases can't be forward declared (only defined), so the // callee must be in the same TU and therefore we can direct-call it without // worrying about it being out of range. diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index e98f6c4984a75..93d6024f34c09 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -3202,6 +3202,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (Callee && !computeCallAddress(Callee, Addr)) return false; + // MO_GOT is not handled. -fno-plt compiled intrinsic calls do not have the + // nonlazybind attribute. Check "RtLibUseGOT" instead. + if ((Subtarget->classifyGlobalFunctionReference(Addr.getGlobalValue(), TM) != + AArch64II::MO_NO_FLAG) || + MF->getFunction().getParent()->getRtLibUseGOT()) + return false; + // The weak function target may be zero; in that case we must use indirect // addressing via a stub on windows as it may be out of range for a // PC-relative jump. diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 96ea692d03f56..56de890c78dec 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7969,13 +7969,14 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0); } } else if (auto *S = dyn_cast(Callee)) { - if (getTargetMachine().getCodeModel() == CodeModel::Large && - Subtarget->isTargetMachO()) { - const char *Sym = S->getSymbol(); + bool UseGot = (getTargetMachine().getCodeModel() == CodeModel::Large && + Subtarget->isTargetMachO()) || + MF.getFunction().getParent()->getRtLibUseGOT(); + const char *Sym = S->getSymbol(); + if (UseGot) { Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT); Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee); } else { - const char *Sym = S->getSymbol(); Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0); } } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index cf57d950ae8d7..c4c6827313b5e 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -43,10 +43,10 @@ static cl::opt UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " "an address is ignored"), cl::init(false), cl::Hidden); -static cl::opt - UseNonLazyBind("aarch64-enable-nonlazybind", - cl::desc("Call nonlazybind functions via direct GOT load"), - cl::init(false), cl::Hidden); +static cl::opt MachOUseNonLazyBind( + "aarch64-macho-enable-nonlazybind", + cl::desc("Call nonlazybind functions via direct GOT load for Mach-O"), + cl::Hidden); static cl::opt UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen.")); @@ -434,7 +434,8 @@ unsigned AArch64Subtarget::classifyGlobalFunctionReference( // NonLazyBind goes via GOT unless we know it's available locally. auto *F = dyn_cast(GV); - if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && + if ((!isTargetMachO() || MachOUseNonLazyBind) && F && + F->hasFnAttribute(Attribute::NonLazyBind) && !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) return AArch64II::MO_GOT; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 84057ea8d2214..773eadbf34de3 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1273,8 +1273,19 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, !Subtarget.noBTIAtReturnTwice() && MF.getInfo()->branchTargetEnforcement()) Opc = AArch64::BLR_BTI; - else + else { + // For an intrinsic call (e.g. memset), use GOT if "RtLibUseGOT" (-fno-plt) + // is set. + if (Info.Callee.isSymbol() && F.getParent()->getRtLibUseGOT()) { + auto Reg = + MRI.createGenericVirtualRegister(getLLTForType(*F.getType(), DL)); + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_GLOBAL_VALUE); + DstOp(Reg).addDefToMIB(MRI, MIB); + MIB.addExternalSymbol(Info.Callee.getSymbolName(), AArch64II::MO_GOT); + Info.Callee = MachineOperand::CreateReg(Reg, false); + } Opc = getCallOpcode(MF, Info.Callee.isReg(), false); + } auto MIB = MIRBuilder.buildInstrNoInsert(Opc); unsigned CalleeOpNo = 0; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 8344e79f78e1e..e60db260e3ef1 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2841,11 +2841,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { } case TargetOpcode::G_GLOBAL_VALUE: { - auto GV = I.getOperand(1).getGlobal(); - if (GV->isThreadLocal()) - return selectTLSGlobalValue(I, MRI); + const GlobalValue *GV = nullptr; + unsigned OpFlags; + if (I.getOperand(1).isSymbol()) { + OpFlags = I.getOperand(1).getTargetFlags(); + // Currently only used by "RtLibUseGOT". + assert(OpFlags == AArch64II::MO_GOT); + } else { + GV = I.getOperand(1).getGlobal(); + if (GV->isThreadLocal()) + return selectTLSGlobalValue(I, MRI); + OpFlags = STI.ClassifyGlobalReference(GV, TM); + } - unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); if (OpFlags & AArch64II::MO_GOT) { I.setDesc(TII.get(AArch64::LOADgot)); I.getOperand(1).setTargetFlags(OpFlags); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index b561cb12c93a1..83137949d0f24 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1314,6 +1314,9 @@ bool AArch64LegalizerInfo::legalizeSmallCMGlobalValue( // By splitting this here, we can optimize accesses in the small code model by // folding in the G_ADD_LOW into the load/store offset. auto &GlobalOp = MI.getOperand(1); + // Don't modify an intrinsic call. + if (GlobalOp.isSymbol()) + return true; const auto* GV = GlobalOp.getGlobal(); if (GV->isThreadLocal()) return true; // Don't want to modify TLS vars. diff --git a/llvm/test/CodeGen/AArch64/nonlazybind.ll b/llvm/test/CodeGen/AArch64/nonlazybind.ll index 669a8ee04b249..f5bb3a4ecbc9a 100644 --- a/llvm/test/CodeGen/AArch64/nonlazybind.ll +++ b/llvm/test/CodeGen/AArch64/nonlazybind.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-enable-nonlazybind | FileCheck %s --check-prefix=MACHO +; RUN: llc -mtriple=aarch64-apple-ios %s -o - -aarch64-macho-enable-nonlazybind | FileCheck %s --check-prefix=MACHO ; RUN: llc -mtriple=aarch64-apple-ios %s -o - | FileCheck %s --check-prefix=MACHO-NORMAL ; RUN: llc -mtriple=aarch64 -fast-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-FI ; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=ELF,ELF-GI @@ -19,13 +19,18 @@ define void @test_laziness(ptr %a) nounwind { ; MACHO-NEXT: Lloh1: ; MACHO-NEXT: ldr x8, [x8, _external@GOTPAGEOFF] ; MACHO-NEXT: blr x8 +; MACHO-NEXT: Lloh2: +; MACHO-NEXT: adrp x8, _memset@GOTPAGE ; MACHO-NEXT: mov x0, x19 ; MACHO-NEXT: mov w1, #1 ; =0x1 +; MACHO-NEXT: Lloh3: +; MACHO-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF] ; MACHO-NEXT: mov w2, #1000 ; =0x3e8 -; MACHO-NEXT: bl _memset +; MACHO-NEXT: blr x8 ; MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; MACHO-NEXT: ret +; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3 ; MACHO-NEXT: .loh AdrpLdrGot Lloh0, Lloh1 ; ; MACHO-NORMAL-LABEL: test_laziness: @@ -34,50 +39,34 @@ define void @test_laziness(ptr %a) nounwind { ; MACHO-NORMAL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; MACHO-NORMAL-NEXT: mov x19, x0 ; MACHO-NORMAL-NEXT: bl _external +; MACHO-NORMAL-NEXT: Lloh0: +; MACHO-NORMAL-NEXT: adrp x8, _memset@GOTPAGE ; MACHO-NORMAL-NEXT: mov x0, x19 ; MACHO-NORMAL-NEXT: mov w1, #1 ; =0x1 +; MACHO-NORMAL-NEXT: Lloh1: +; MACHO-NORMAL-NEXT: ldr x8, [x8, _memset@GOTPAGEOFF] ; MACHO-NORMAL-NEXT: mov w2, #1000 ; =0x3e8 -; MACHO-NORMAL-NEXT: bl _memset +; MACHO-NORMAL-NEXT: blr x8 ; MACHO-NORMAL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; MACHO-NORMAL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload ; MACHO-NORMAL-NEXT: ret +; MACHO-NORMAL-NEXT: .loh AdrpLdrGot Lloh0, Lloh1 ; -; ELF-FI-LABEL: test_laziness: -; ELF-FI: // %bb.0: -; ELF-FI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill -; ELF-FI-NEXT: mov x19, x0 -; ELF-FI-NEXT: bl external -; ELF-FI-NEXT: mov w8, #1 // =0x1 -; ELF-FI-NEXT: mov x0, x19 -; ELF-FI-NEXT: mov x2, #1000 // =0x3e8 -; ELF-FI-NEXT: uxtb w1, w8 -; ELF-FI-NEXT: bl memset -; ELF-FI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; ELF-FI-NEXT: ret -; -; ELF-GI-LABEL: test_laziness: -; ELF-GI: // %bb.0: -; ELF-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill -; ELF-GI-NEXT: mov x19, x0 -; ELF-GI-NEXT: bl external -; ELF-GI-NEXT: mov x0, x19 -; ELF-GI-NEXT: mov w1, #1 // =0x1 -; ELF-GI-NEXT: mov w2, #1000 // =0x3e8 -; ELF-GI-NEXT: bl memset -; ELF-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; ELF-GI-NEXT: ret -; -; ELF-SDAG-LABEL: test_laziness: -; ELF-SDAG: // %bb.0: -; ELF-SDAG-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill -; ELF-SDAG-NEXT: mov x19, x0 -; ELF-SDAG-NEXT: bl external -; ELF-SDAG-NEXT: mov x0, x19 -; ELF-SDAG-NEXT: mov w1, #1 // =0x1 -; ELF-SDAG-NEXT: mov w2, #1000 // =0x3e8 -; ELF-SDAG-NEXT: bl memset -; ELF-SDAG-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; ELF-SDAG-NEXT: ret +; ELF-LABEL: test_laziness: +; ELF: // %bb.0: +; ELF-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; ELF-NEXT: adrp x8, :got:external +; ELF-NEXT: mov x19, x0 +; ELF-NEXT: ldr x8, [x8, :got_lo12:external] +; ELF-NEXT: blr x8 +; ELF-NEXT: adrp x8, :got:memset +; ELF-NEXT: mov x0, x19 +; ELF-NEXT: mov w1, #1 // =0x1 +; ELF-NEXT: ldr x8, [x8, :got_lo12:memset] +; ELF-NEXT: mov w2, #1000 // =0x3e8 +; ELF-NEXT: blr x8 +; ELF-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; ELF-NEXT: ret call void @external() call void @llvm.memset.p0.i64(ptr align 1 %a, i8 1, i64 1000, i1 false) ret void @@ -86,12 +75,12 @@ define void @test_laziness(ptr %a) nounwind { define void @test_laziness_tail() nounwind { ; MACHO-LABEL: test_laziness_tail: ; MACHO: ; %bb.0: -; MACHO-NEXT: Lloh2: +; MACHO-NEXT: Lloh4: ; MACHO-NEXT: adrp x0, _external@GOTPAGE -; MACHO-NEXT: Lloh3: +; MACHO-NEXT: Lloh5: ; MACHO-NEXT: ldr x0, [x0, _external@GOTPAGEOFF] ; MACHO-NEXT: br x0 -; MACHO-NEXT: .loh AdrpLdrGot Lloh2, Lloh3 +; MACHO-NEXT: .loh AdrpLdrGot Lloh4, Lloh5 ; ; MACHO-NORMAL-LABEL: test_laziness_tail: ; MACHO-NORMAL: ; %bb.0: @@ -99,7 +88,9 @@ define void @test_laziness_tail() nounwind { ; ; ELF-LABEL: test_laziness_tail: ; ELF: // %bb.0: -; ELF-NEXT: b external +; ELF-NEXT: adrp x0, :got:external +; ELF-NEXT: ldr x0, [x0, :got_lo12:external] +; ELF-NEXT: br x0 tail call void @external() ret void } @@ -108,3 +99,7 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) !llvm.module.flags = !{!0} !0 = !{i32 7, !"RtLibUseGOT", i32 1} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; ELF-FI: {{.*}} +; ELF-GI: {{.*}} +; ELF-SDAG: {{.*}}