Skip to content

Commit 9675cef

Browse files
committed
[x86_64][windows][swift] do not use Swift async extended frame for windows x86_64
targets that use windows 64 prologue Windows x86_64 stack frame layout is currently not compatible with Swift's async extended frame, which reserves the slot right below RBP (RBP-8) for the async context pointer, as it doesn't account for the fact that a stack object in a win64 frame can be allocated at the same location. This can cause issues at runtime, for instance, Swift's TCA test code has functions that fail because of this issue, as they spill a value to that slack slot, which then gets overwritten by a store into address returned by the @llvm.swift.async.context.addr() intrinsic (that ends up being RBP - 8), leading to an incorrect value being used at a later point when that stack slot is being read from again. This change drops the use of async extended frame for windows x86_64 subtargets and instead uses the x32 based approach of allocating a separate stack slot for the stored async context pointer. Additionally, LLDB which is the primary consumer of the extended frame makes assumptions like checking for a saved previous frame pointer at the current frame pointer address, which is also incompatible with the windows x86_64 frame layout, as the previous frame pointer is not guaranteed to be stored at the current frame pointer address. Therefore the extended frame layout can be turned off to fix the current miscompile without introducing regression into LLDB for windows x86_64 as it already doesn't work correctly. I am still investigating what should be made for LLDB to support using an allocated stack slot to store the async frame context instead of being located at RBP - 8 for windows.
1 parent 7dd790d commit 9675cef

File tree

5 files changed

+71
-34
lines changed

5 files changed

+71
-34
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1605,6 +1605,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
16051605
[[fallthrough]];
16061606

16071607
case SwiftAsyncFramePointerMode::Always:
1608+
assert(
1609+
!IsWin64Prologue &&
1610+
"win64 prologue does not set the bit 60 in the saved frame pointer");
16081611
BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), MachineFramePtr)
16091612
.addUse(MachineFramePtr)
16101613
.addImm(60)
@@ -1747,6 +1750,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
17471750

17481751
if (!IsFunclet) {
17491752
if (X86FI->hasSwiftAsyncContext()) {
1753+
assert(!IsWin64Prologue &&
1754+
"win64 prologue does not store async context right below rbp");
17501755
const auto &Attrs = MF.getFunction().getAttributes();
17511756

17521757
// Before we update the live frame pointer we have to ensure there's a

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26572,6 +26572,15 @@ static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain,
2657226572
return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
2657326573
}
2657426574

26575+
bool X86::isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
26576+
const MachineFunction &MF) {
26577+
if (!Subtarget.is64Bit())
26578+
return false;
26579+
// 64-bit targets support extended Swift async frame setup,
26580+
// except for targets that use the windows 64 prologue.
26581+
return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
26582+
}
26583+
2657526584
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2657626585
SelectionDAG &DAG) {
2657726586
unsigned IntNo = Op.getConstantOperandVal(1);
@@ -26583,7 +26592,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2658326592
SDLoc dl(Op);
2658426593
auto &MF = DAG.getMachineFunction();
2658526594
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
26586-
if (Subtarget.is64Bit()) {
26595+
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF)) {
2658726596
MF.getFrameInfo().setFrameAddressIsTaken(true);
2658826597
X86FI->setHasSwiftAsyncContext(true);
2658926598
SDValue Chain = Op->getOperand(0);
@@ -26596,13 +26605,15 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
2659626605
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
2659726606
CopyRBP.getValue(1));
2659826607
} else {
26599-
// 32-bit so no special extended frame, create or reuse an existing
26600-
// stack slot.
26608+
// No special extended frame, create or reuse an existing stack slot.
26609+
int PtrSize = Subtarget.is64Bit() ? 8 : 4;
2660126610
if (!X86FI->getSwiftAsyncContextFrameIdx())
2660226611
X86FI->setSwiftAsyncContextFrameIdx(
26603-
MF.getFrameInfo().CreateStackObject(4, Align(4), false));
26612+
MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize),
26613+
false));
2660426614
SDValue Result =
26605-
DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
26615+
DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(),
26616+
PtrSize == 8 ? MVT::i64 : MVT::i32);
2660626617
// Return { result, chain }.
2660726618
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result,
2660826619
Op->getOperand(0));

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,11 @@ namespace llvm {
966966
/// Check if Op is an operation that could be folded into a zero extend x86
967967
/// instruction.
968968
bool mayFoldIntoZeroExtend(SDValue Op);
969+
970+
/// True if the target supports the extended frame for async Swift
971+
/// functions.
972+
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
973+
const MachineFunction &MF);
969974
} // end namespace X86
970975

971976
//===--------------------------------------------------------------------===//

llvm/lib/Target/X86/X86ISelLoweringCall.cpp

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1813,14 +1813,17 @@ SDValue X86TargetLowering::LowerFormalArguments(
18131813
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
18141814
if (Ins[I].Flags.isSwiftAsync()) {
18151815
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1816-
if (Subtarget.is64Bit())
1816+
if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
18171817
X86FI->setHasSwiftAsyncContext(true);
18181818
else {
1819-
int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
1819+
int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1820+
int FI =
1821+
MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
18201822
X86FI->setSwiftAsyncContextFrameIdx(FI);
1821-
SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
1822-
DAG.getFrameIndex(FI, MVT::i32),
1823-
MachinePointerInfo::getFixedStack(MF, FI));
1823+
SDValue St = DAG.getStore(
1824+
DAG.getEntryNode(), dl, InVals[I],
1825+
DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1826+
MachinePointerInfo::getFixedStack(MF, FI));
18241827
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
18251828
}
18261829
}

llvm/test/CodeGen/X86/swift-async-win64.ll

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@ define void @simple(ptr swiftasync %context) "frame-pointer"="all" {
66
}
77

88
; CHECK64-LABEL: simple:
9-
; CHECK64: btsq $60, %rbp
109
; CHECK64: pushq %rbp
11-
; CHECK64: pushq %r14
12-
; CHECK64: leaq 8(%rsp), %rbp
13-
; [...]
14-
; CHECK64: addq $16, %rsp
10+
; CHECK64: pushq %rax
11+
; CHECK64: movq %rsp, %rbp
12+
; CHECK64: movq %r14, (%rbp)
13+
; CHECK64: addq $8, %rsp
1514
; CHECK64: popq %rbp
16-
; CHECK64: btrq $60, %rbp
1715
; CHECK64: retq
1816

1917
; CHECK32-LABEL: simple:
@@ -26,20 +24,20 @@ define void @more_csrs(ptr swiftasync %context) "frame-pointer"="all" {
2624
}
2725

2826
; CHECK64-LABEL: more_csrs:
29-
; CHECK64: btsq $60, %rbp
3027
; CHECK64: pushq %rbp
3128
; CHECK64: .seh_pushreg %rbp
32-
; CHECK64: pushq %r14
33-
; CHECK64: .seh_pushreg %r14
34-
; CHECK64: leaq 8(%rsp), %rbp
35-
; CHECK64: subq $8, %rsp
3629
; CHECK64: pushq %r15
3730
; CHECK64: .seh_pushreg %r15
31+
; CHECK64: pushq %rax
32+
; CHECK64: .seh_stackalloc 8
33+
; CHECK64: movq %rsp, %rbp
34+
; CHECK64: .seh_setframe %rbp, 0
35+
; CHECK64: .seh_endprologue
36+
; CHECK64: movq %r14, (%rbp)
3837
; [...]
38+
; CHECK64: addq $8, %rsp
3939
; CHECK64: popq %r15
40-
; CHECK64: addq $16, %rsp
4140
; CHECK64: popq %rbp
42-
; CHECK64: btrq $60, %rbp
4341
; CHECK64: retq
4442

4543
declare void @f(ptr)
@@ -51,21 +49,16 @@ define void @locals(ptr swiftasync %context) "frame-pointer"="all" {
5149
}
5250

5351
; CHECK64-LABEL: locals:
54-
; CHECK64: btsq $60, %rbp
5552
; CHECK64: pushq %rbp
5653
; CHECK64: .seh_pushreg %rbp
57-
; CHECK64: pushq %r14
58-
; CHECK64: .seh_pushreg %r14
59-
; CHECK64: leaq 8(%rsp), %rbp
60-
; CHECK64: subq $88, %rsp
54+
; CHECK64: subq $80, %rsp
55+
; CHECK64: movq %r14, -8(%rbp)
6156

6257
; CHECK64: leaq -48(%rbp), %rcx
6358
; CHECK64: callq f
6459

6560
; CHECK64: addq $80, %rsp
66-
; CHECK64: addq $16, %rsp
6761
; CHECK64: popq %rbp
68-
; CHECK64: btrq $60, %rbp
6962
; CHECK64: retq
7063

7164
define void @use_input_context(ptr swiftasync %context, ptr %ptr) "frame-pointer"="all" {
@@ -84,7 +77,7 @@ define ptr @context_in_func() "frmae-pointer"="non-leaf" {
8477
}
8578

8679
; CHECK64-LABEL: context_in_func:
87-
; CHECK64: leaq -8(%rbp), %rax
80+
; CHECK64: movq %rsp, %rax
8881

8982
; CHECK32-LABEL: context_in_func:
9083
; CHECK32: movl %esp, %eax
@@ -96,13 +89,33 @@ define void @write_frame_context(ptr swiftasync %context, ptr %new_context) "fra
9689
}
9790

9891
; CHECK64-LABEL: write_frame_context:
99-
; CHECK64: movq %rbp, [[TMP:%.*]]
100-
; CHECK64: subq $8, [[TMP]]
101-
; CHECK64: movq %rcx, ([[TMP]])
92+
; CHECK64: movq %rcx, (%rsp)
10293

10394
define void @simple_fp_elim(ptr swiftasync %context) "frame-pointer"="non-leaf" {
10495
ret void
10596
}
10697

10798
; CHECK64-LABEL: simple_fp_elim:
10899
; CHECK64-NOT: btsq
100+
101+
define void @manylocals_and_overwritten_context(ptr swiftasync %context, ptr %new_context) "frame-pointer"="all" {
102+
%ptr = call ptr @llvm.swift.async.context.addr()
103+
store ptr %new_context, ptr %ptr
104+
%var1 = alloca i64, i64 1
105+
call void @f(ptr %var1)
106+
%var2 = alloca i64, i64 16
107+
call void @f(ptr %var2)
108+
%ptr2 = call ptr @llvm.swift.async.context.addr()
109+
store ptr %new_context, ptr %ptr2
110+
ret void
111+
}
112+
113+
; CHECK64-LABEL: manylocals_and_overwritten_context:
114+
; CHECK64: pushq %rbp
115+
; CHECK64: subq $184, %rsp
116+
; CHECK64: leaq 128(%rsp), %rbp
117+
; CHECK64: movq %rcx, %rsi
118+
; CHECK64: movq %rcx, 48(%rbp)
119+
; CHECK64: callq f
120+
; CHECK64: callq f
121+
; CHECK64: movq %rsi, 48(%rbp)

0 commit comments

Comments
 (0)