Skip to content

Backport ARM64EC variadic args fixes to LLVM 18 #81800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 39 additions & 16 deletions llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,

namespace {

enum class ThunkType { GuestExit, Entry, Exit };

class AArch64Arm64ECCallLowering : public ModulePass {
public:
static char ID;
Expand All @@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass {
Type *I64Ty;
Type *VoidTy;

void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk,
void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out, FunctionType *&Arm64Ty,
FunctionType *&X64Ty);
void getThunkRetType(FunctionType *FT, AttributeList AttrList,
raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr);
void getThunkArgTypes(FunctionType *FT, AttributeList AttrList,
void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr);
Expand All @@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass {

void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
AttributeList AttrList,
bool EntryThunk, raw_ostream &Out,
ThunkType TT, raw_ostream &Out,
FunctionType *&Arm64Ty,
FunctionType *&X64Ty) {
Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$");
Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$"
: "$iexit_thunk$cdecl$");

Type *Arm64RetTy;
Type *X64RetTy;
Expand All @@ -102,23 +105,25 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,

// The first argument to a thunk is the called function, stored in x9.
// For exit thunks, we pass the called function down to the emulator;
// for entry thunks, we just call the Arm64 function directly.
if (!EntryThunk)
// for entry/guest exit thunks, we just call the Arm64 function directly.
if (TT == ThunkType::Exit)
Arm64ArgTypes.push_back(PtrTy);
X64ArgTypes.push_back(PtrTy);

bool HasSretPtr = false;
getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
X64ArgTypes, HasSretPtr);

getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr);
getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
HasSretPtr);

Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);

X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false);
}

void AArch64Arm64ECCallLowering::getThunkArgTypes(
FunctionType *FT, AttributeList AttrList, raw_ostream &Out,
FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) {

Expand Down Expand Up @@ -156,9 +161,11 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
X64ArgTypes.push_back(PtrTy);
// x5
Arm64ArgTypes.push_back(I64Ty);
// FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
// have proper isel for varargs
X64ArgTypes.push_back(I64Ty);
if (TT != ThunkType::Entry) {
// FIXME: x5 isn't actually used by the x64 side; revisit once we
// have proper isel for varargs
X64ArgTypes.push_back(I64Ty);
}
return;
}

Expand Down Expand Up @@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
SmallString<256> ExitThunkName;
llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
FunctionType *Arm64Ty, *X64Ty;
getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty,
X64Ty);
getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(ExitThunkName))
return F;

Expand Down Expand Up @@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
SmallString<256> EntryThunkName;
llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
FunctionType *Arm64Ty, *X64Ty;
getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry,
EntryThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(EntryThunkName))
return F;
Expand All @@ -465,10 +471,11 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {

bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy();
unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1;
unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size();

// Translate arguments to call.
SmallVector<Value *> Args;
for (unsigned i = ThunkArgOffset, e = Thunk->arg_size(); i != e; ++i) {
for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) {
Value *Arg = Thunk->getArg(i);
Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset);
if (ArgTy->isArrayTy() || ArgTy->isStructTy() ||
Expand All @@ -485,6 +492,22 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Args.push_back(Arg);
}

if (F->isVarArg()) {
// The 5th argument to variadic entry thunks is used to model the x64 sp
// which is passed to the thunk in x4, this can be passed to the callee as
// the variadic argument start address after skipping over the 32 byte
// shadow store.

// The EC thunk CC will assign any argument marked as InReg to x4.
Thunk->addParamAttr(5, Attribute::InReg);
Value *Arg = Thunk->getArg(5);
Arg = IRB.CreatePtrAdd(Arg, IRB.getInt64(0x20));
Args.push_back(Arg);

// Pass in a zero variadic argument size (in x5).
Args.push_back(IRB.getInt64(0));
}

// Call the function passed to the thunk.
Value *Callee = Thunk->getArg(0);
Callee = IRB.CreateBitCast(Callee, PtrTy);
Expand Down Expand Up @@ -518,7 +541,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
llvm::raw_null_ostream NullThunkName;
FunctionType *Arm64Ty, *X64Ty;
getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit,
NullThunkName, Arm64Ty, X64Ty);
auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
assert(MangledName && "Can't guest exit to function that's already native");
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AArch64/AArch64CallingConvention.td
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,9 @@ def CC_AArch64_Arm64EC_VarArg : CallingConv<[
// address is passed in X9.
let Entry = 1 in
def CC_AArch64_Arm64EC_Thunk : CallingConv<[
// ARM64EC-specific: the InReg attribute can be used to access the x64 sp passed into entry thunks in x4 from the IR.
CCIfInReg<CCIfType<[i64], CCAssignToReg<[X4]>>>,

// Byval aggregates are passed by pointer
CCIfByVal<CCPassIndirect<i64>>,

Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}

if (IsVarArg && Subtarget->isWindowsArm64EC()) {
SDValue ParamPtr = StackPtr;
if (IsTailCall) {
// Create a dummy object at the top of the stack that can be used to get
// the SP after the epilogue
int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true);
ParamPtr = DAG.getFrameIndex(FI, PtrVT);
}

// For vararg calls, the Arm64EC ABI requires values in x4 and x5
// describing the argument list. x4 contains the address of the
// first stack parameter. x5 contains the size in bytes of all parameters
// passed on the stack.
RegsToPass.emplace_back(AArch64::X4, StackPtr);
RegsToPass.emplace_back(AArch64::X4, ParamPtr);
RegsToPass.emplace_back(AArch64::X5,
DAG.getConstant(NumBytes, DL, MVT::i64));
}
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind {
; CHECK-NEXT: add x29, sp, #160
; CHECK-NEXT: .seh_add_fp 160
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: ldp x8, x5, [x4, #32]
; CHECK-NEXT: mov x4, x8
; CHECK-NEXT: add x4, x4, #32
; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: blr x9
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret
; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret]
Expand Down
37 changes: 37 additions & 0 deletions llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind {
ret void
}

define void @varargs_caller_tail() nounwind {
; CHECK-LABEL: varargs_caller_tail:
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #48
; CHECK-NEXT: mov x4, sp
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
; CHECK-NEXT: mov w1, #2 // =0x2
; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
; CHECK-NEXT: mov w3, #4 // =0x4
; CHECK-NEXT: mov w5, #16 // =0x10
; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
; CHECK-NEXT: stp x9, x8, [sp]
; CHECK-NEXT: str xzr, [sp, #16]
; CHECK-NEXT: .weak_anti_dep varargs_callee
; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
; CHECK-NEXT: bl "#varargs_callee"
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
; CHECK-NEXT: add x4, sp, #48
; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
; CHECK-NEXT: mov w1, #4 // =0x4
; CHECK-NEXT: mov w2, #3 // =0x3
; CHECK-NEXT: mov w3, #2 // =0x2
; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: add sp, sp, #48
; CHECK-NEXT: .weak_anti_dep varargs_callee
; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
; CHECK-NEXT:.set "#varargs_callee", varargs_callee@WEAKREF
; CHECK-NEXT: b "#varargs_callee"
call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2)
ret void
}

declare void @llvm.va_start(ptr)
8 changes: 8 additions & 0 deletions llvm/test/CodeGen/AArch64/vararg-tallcall.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s

Expand Down Expand Up @@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" }
; CHECK: ldr x9, [x9]
; CHECK: mov v0.16b, v16.16b
; CHECK: br x9
; CHECK-EC: mov v7.16b, v0.16b
; CHECK-EC: ldr x9, [x0]
; CHECK-EC: ldr x11, [x9]
; CHECK-EC: mov v0.16b, v7.16b
; CHECK-EC: add x4, sp, #64
; CHECK-EC: add sp, sp, #64
; CHECK-EC: br x11