-
Notifications
You must be signed in to change notification settings - Fork 13.4k
PR for llvm/llvm-project#80752 #80754
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ARM64EC varargs calls expect that x4 = sp at entry, special handling is needed to ensure this with tail calls since they occur after the epilogue and the x4 write happens before. I tried going through AArch64MachineFrameLowering for this, hoping to avoid creating the dummy object but this was the best I could do since the stack info that uses isn't populated at this stage, CreateFixedObject also explicitly forbids 0 sized objects. (cherry picked from commit c761b4a)
llvm#80595) ISel handles filling in x4/x5 when calling variadic functions as they don't correspond to the 5th/6th X64 arguments but rather to the end of the shadow space on the stack and the size in bytes of all stack parameters (ignored and written as 0 for calls from entry thunks). Will PR a follow up with ISel handling after this is merged. (cherry picked from commit 8f07014)
@efriedma-quic @cjacek What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-aarch64 Author: None (llvmbot) Changesresolves llvm/llvm-project#80752 Full diff: https://github.com/llvm/llvm-project/pull/80754.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 11248bb7aef31..91b4f18c73c93 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -43,6 +43,8 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,
namespace {
+enum class ThunkType { GuestExit, Entry, Exit };
+
class AArch64Arm64ECCallLowering : public ModulePass {
public:
static char ID;
@@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass {
Type *I64Ty;
Type *VoidTy;
- void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk,
+ void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out, FunctionType *&Arm64Ty,
FunctionType *&X64Ty);
void getThunkRetType(FunctionType *FT, AttributeList AttrList,
raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr);
- void getThunkArgTypes(FunctionType *FT, AttributeList AttrList,
+ void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr);
@@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass {
void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
AttributeList AttrList,
- bool EntryThunk, raw_ostream &Out,
+ ThunkType TT, raw_ostream &Out,
FunctionType *&Arm64Ty,
FunctionType *&X64Ty) {
- Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$");
+ Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$"
+ : "$iexit_thunk$cdecl$");
Type *Arm64RetTy;
Type *X64RetTy;
@@ -102,8 +105,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
// The first argument to a thunk is the called function, stored in x9.
// For exit thunks, we pass the called function down to the emulator;
- // for entry thunks, we just call the Arm64 function directly.
- if (!EntryThunk)
+ // for entry/guest exit thunks, we just call the Arm64 function directly.
+ if (TT == ThunkType::Exit)
Arm64ArgTypes.push_back(PtrTy);
X64ArgTypes.push_back(PtrTy);
@@ -111,14 +114,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
X64ArgTypes, HasSretPtr);
- getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr);
+ getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
+ HasSretPtr);
- Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);
+ Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes,
+ TT == ThunkType::Entry && FT->isVarArg());
X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false);
}
void AArch64Arm64ECCallLowering::getThunkArgTypes(
- FunctionType *FT, AttributeList AttrList, raw_ostream &Out,
+ FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) {
@@ -151,14 +156,16 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
X64ArgTypes.push_back(I64Ty);
}
- // x4
- Arm64ArgTypes.push_back(PtrTy);
- X64ArgTypes.push_back(PtrTy);
- // x5
- Arm64ArgTypes.push_back(I64Ty);
- // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
- // have proper isel for varargs
- X64ArgTypes.push_back(I64Ty);
+ if (TT != ThunkType::Entry) {
+ // x4
+ Arm64ArgTypes.push_back(PtrTy);
+ X64ArgTypes.push_back(PtrTy);
+ // x5
+ Arm64ArgTypes.push_back(I64Ty);
+ // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
+ // have proper isel for varargs
+ X64ArgTypes.push_back(I64Ty);
+ }
return;
}
@@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
SmallString<256> ExitThunkName;
llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty,
- X64Ty);
+ getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(ExitThunkName))
return F;
@@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
SmallString<256> EntryThunkName;
llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+ getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry,
EntryThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(EntryThunkName))
return F;
@@ -518,7 +524,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
llvm::raw_null_ostream NullThunkName;
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+ getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit,
NullThunkName, Arm64Ty, X64Ty);
auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
assert(MangledName && "Can't guest exit to function that's already native");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e97f5e3220148..957b556edaf31 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (IsVarArg && Subtarget->isWindowsArm64EC()) {
+ SDValue ParamPtr = StackPtr;
+ if (IsTailCall) {
+ // Create a dummy object at the top of the stack that can be used to get
+ // the SP after the epilogue
+ int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true);
+ ParamPtr = DAG.getFrameIndex(FI, PtrVT);
+ }
+
// For vararg calls, the Arm64EC ABI requires values in x4 and x5
// describing the argument list. x4 contains the address of the
// first stack parameter. x5 contains the size in bytes of all parameters
// passed on the stack.
- RegsToPass.emplace_back(AArch64::X4, StackPtr);
+ RegsToPass.emplace_back(AArch64::X4, ParamPtr);
RegsToPass.emplace_back(AArch64::X5,
DAG.getConstant(NumBytes, DL, MVT::i64));
}
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 5c56f51e1ca55..0083818def151 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind {
; CHECK-NEXT: add x29, sp, #160
; CHECK-NEXT: .seh_add_fp 160
; CHECK-NEXT: .seh_endprologue
-; CHECK-NEXT: ldp x8, x5, [x4, #32]
-; CHECK-NEXT: mov x4, x8
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: blr x9
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret
; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret]
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
index dc16b3a1a0f27..844fc52ddade6 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
@@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind {
ret void
}
+define void @varargs_caller_tail() nounwind {
+; CHECK-LABEL: varargs_caller_tail:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: add x8, sp, #16
+; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
+; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
+; CHECK-NEXT: mov w3, #4 // =0x4
+; CHECK-NEXT: mov w5, #16 // =0x10
+; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: stp x9, x8, [sp]
+; CHECK-NEXT: str xzr, [sp, #16]
+; CHECK-NEXT: .weak_anti_dep varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
+; CHECK-NEXT: bl "#varargs_callee"
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add x4, sp, #48
+; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: mov w1, #4 // =0x4
+; CHECK-NEXT: mov w2, #3 // =0x3
+; CHECK-NEXT: mov w3, #2 // =0x2
+; CHECK-NEXT: mov x5, xzr
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: .weak_anti_dep varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
+; CHECK-NEXT: b "#varargs_callee"
+ call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
+ tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2)
+ ret void
+}
declare void @llvm.va_start(ptr)
diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
index 2d6db1642247d..812837639196e 100644
--- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
+++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
@@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" }
; CHECK: ldr x9, [x9]
; CHECK: mov v0.16b, v16.16b
; CHECK: br x9
+; CHECK-EC: mov v7.16b, v0.16b
+; CHECK-EC: ldr x9, [x0]
+; CHECK-EC: ldr x11, [x9]
+; CHECK-EC: mov v0.16b, v7.16b
+; CHECK-EC: add x4, sp, #64
+; CHECK-EC: add sp, sp, #64
+; CHECK-EC: br x11
|
Superseded by #81800 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
resolves #80752