-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Backport ARM64EC variadic args fixes to LLVM 18 #81800
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Daniel Paoliello (dpaoliello) ChangesBackports two fixes for ARM64EC variadic args:
Full diff: https://github.com/llvm/llvm-project/pull/81800.diff 5 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 11248bb7aef31f..91b4f18c73c935 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -43,6 +43,8 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,
namespace {
+enum class ThunkType { GuestExit, Entry, Exit };
+
class AArch64Arm64ECCallLowering : public ModulePass {
public:
static char ID;
@@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass {
Type *I64Ty;
Type *VoidTy;
- void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk,
+ void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out, FunctionType *&Arm64Ty,
FunctionType *&X64Ty);
void getThunkRetType(FunctionType *FT, AttributeList AttrList,
raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr);
- void getThunkArgTypes(FunctionType *FT, AttributeList AttrList,
+ void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr);
@@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass {
void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
AttributeList AttrList,
- bool EntryThunk, raw_ostream &Out,
+ ThunkType TT, raw_ostream &Out,
FunctionType *&Arm64Ty,
FunctionType *&X64Ty) {
- Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$");
+ Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$"
+ : "$iexit_thunk$cdecl$");
Type *Arm64RetTy;
Type *X64RetTy;
@@ -102,8 +105,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
// The first argument to a thunk is the called function, stored in x9.
// For exit thunks, we pass the called function down to the emulator;
- // for entry thunks, we just call the Arm64 function directly.
- if (!EntryThunk)
+ // for entry/guest exit thunks, we just call the Arm64 function directly.
+ if (TT == ThunkType::Exit)
Arm64ArgTypes.push_back(PtrTy);
X64ArgTypes.push_back(PtrTy);
@@ -111,14 +114,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
X64ArgTypes, HasSretPtr);
- getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr);
+ getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
+ HasSretPtr);
- Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);
+ Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes,
+ TT == ThunkType::Entry && FT->isVarArg());
X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false);
}
void AArch64Arm64ECCallLowering::getThunkArgTypes(
- FunctionType *FT, AttributeList AttrList, raw_ostream &Out,
+ FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) {
@@ -151,14 +156,16 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
X64ArgTypes.push_back(I64Ty);
}
- // x4
- Arm64ArgTypes.push_back(PtrTy);
- X64ArgTypes.push_back(PtrTy);
- // x5
- Arm64ArgTypes.push_back(I64Ty);
- // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
- // have proper isel for varargs
- X64ArgTypes.push_back(I64Ty);
+ if (TT != ThunkType::Entry) {
+ // x4
+ Arm64ArgTypes.push_back(PtrTy);
+ X64ArgTypes.push_back(PtrTy);
+ // x5
+ Arm64ArgTypes.push_back(I64Ty);
+ // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
+ // have proper isel for varargs
+ X64ArgTypes.push_back(I64Ty);
+ }
return;
}
@@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
SmallString<256> ExitThunkName;
llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty,
- X64Ty);
+ getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(ExitThunkName))
return F;
@@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
SmallString<256> EntryThunkName;
llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+ getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry,
EntryThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(EntryThunkName))
return F;
@@ -518,7 +524,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
llvm::raw_null_ostream NullThunkName;
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+ getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit,
NullThunkName, Arm64Ty, X64Ty);
auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
assert(MangledName && "Can't guest exit to function that's already native");
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index e97f5e32201488..957b556edaf312 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (IsVarArg && Subtarget->isWindowsArm64EC()) {
+ SDValue ParamPtr = StackPtr;
+ if (IsTailCall) {
+ // Create a dummy object at the top of the stack that can be used to get
+ // the SP after the epilogue
+ int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true);
+ ParamPtr = DAG.getFrameIndex(FI, PtrVT);
+ }
+
// For vararg calls, the Arm64EC ABI requires values in x4 and x5
// describing the argument list. x4 contains the address of the
// first stack parameter. x5 contains the size in bytes of all parameters
// passed on the stack.
- RegsToPass.emplace_back(AArch64::X4, StackPtr);
+ RegsToPass.emplace_back(AArch64::X4, ParamPtr);
RegsToPass.emplace_back(AArch64::X5,
DAG.getConstant(NumBytes, DL, MVT::i64));
}
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 5c56f51e1ca554..0083818def1514 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind {
; CHECK-NEXT: add x29, sp, #160
; CHECK-NEXT: .seh_add_fp 160
; CHECK-NEXT: .seh_endprologue
-; CHECK-NEXT: ldp x8, x5, [x4, #32]
-; CHECK-NEXT: mov x4, x8
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: blr x9
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret
; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret]
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
index dc16b3a1a0f270..844fc52ddade63 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
@@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind {
ret void
}
+define void @varargs_caller_tail() nounwind {
+; CHECK-LABEL: varargs_caller_tail:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: add x8, sp, #16
+; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
+; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
+; CHECK-NEXT: mov w3, #4 // =0x4
+; CHECK-NEXT: mov w5, #16 // =0x10
+; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: stp x9, x8, [sp]
+; CHECK-NEXT: str xzr, [sp, #16]
+; CHECK-NEXT: .weak_anti_dep varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
+; CHECK-NEXT: bl "#varargs_callee"
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add x4, sp, #48
+; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: mov w1, #4 // =0x4
+; CHECK-NEXT: mov w2, #3 // =0x3
+; CHECK-NEXT: mov w3, #2 // =0x2
+; CHECK-NEXT: mov x5, xzr
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: .weak_anti_dep varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
+; CHECK-NEXT: b "#varargs_callee"
+ call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
+ tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2)
+ ret void
+}
declare void @llvm.va_start(ptr)
diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
index 2d6db1642247d7..812837639196e6 100644
--- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
+++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
@@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" }
; CHECK: ldr x9, [x9]
; CHECK: mov v0.16b, v16.16b
; CHECK: br x9
+; CHECK-EC: mov v7.16b, v0.16b
+; CHECK-EC: ldr x9, [x0]
+; CHECK-EC: ldr x11, [x9]
+; CHECK-EC: mov v0.16b, v7.16b
+; CHECK-EC: add x4, sp, #64
+; CHECK-EC: add sp, sp, #64
+; CHECK-EC: br x11
|
dce4933
to
e08732e
Compare
@efriedma-quic I think this is ready to land, the MacOS test failure is a known issue. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks fine, but I think merging might need to wait for a point release? CC @tstellar .
@efriedma-quic 18.1.0 was just released, so I think now's a good time to merge this in for 18.1.1 |
e08732e
to
e90c108
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
4715388
to
ec6b7e8
Compare
ARM64EC varargs calls expect that x4 = sp at entry, special handling is needed to ensure this with tail calls since they occur after the epilogue and the x4 write happens before. I tried going through AArch64MachineFrameLowering for this, hoping to avoid creating the dummy object but this was the best I could do since the stack info that uses isn't populated at this stage, CreateFixedObject also explicitly forbids 0 sized objects.
llvm#80595) ISel handles filling in x4/x5 when calling variadic functions as they don't correspond to the 5th/6th X64 arguments but rather to the end of the shadow space on the stack and the size in bytes of all stack parameters (ignored and written as 0 for calls from entry thunks). Will PR a follow up with ISel handling after this is merged.
…ls (llvm#80994) When in an entry thunk the x64 SP is passed in x4 but this cannot be directly passed through since x64 varargs calls have a 32 byte shadow store at SP followed by the in-stack parameters. ARM64EC varargs calls on the other hand expect x4 to point to the first in-stack parameter.
ec6b7e8
to
89d5432
Compare
@tstellar can we please get this merged into the v18 release branch? |
Hi @dpaoliello (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backports fixes for ARM64EC variadic args: