Skip to content

Commit 23d209f

Browse files
authored
[SPARC] Allow overaligned allocas (#107223)
SPARC ABI doesn't use stack realignment, so let LLVM know about it in `SparcFrameLowering`. This has the side effect of making all overaligned allocations go through `LowerDYNAMIC_STACKALLOC`, so implement the missing logic there too for overaligned allocations. This makes the SPARC backend not crash on overaligned `alloca`s and fix #89569.
1 parent 6927a43 commit 23d209f

11 files changed

+165
-153
lines changed

compiler-rt/test/asan/TestCases/alloca_vla_interact.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
//
44
// REQUIRES: stable-runtime
55

6+
// See https://github.com/llvm/llvm-project/issues/110956
7+
// XFAIL: target=sparc{{.*}}
8+
69
// This testcase checks correct interaction between VLAs and allocas.
710

811
#include <assert.h>

llvm/lib/Target/Sparc/SparcFrameLowering.cpp

+3-39
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ DisableLeafProc("disable-sparc-leaf-proc",
3535
SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST)
3636
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
3737
ST.is64Bit() ? Align(16) : Align(8), 0,
38-
ST.is64Bit() ? Align(16) : Align(8)) {}
38+
ST.is64Bit() ? Align(16) : Align(8),
39+
/*StackRealignable=*/false) {}
3940

4041
void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF,
4142
MachineBasicBlock &MBB,
@@ -97,12 +98,6 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
9798
// Debug location must be unknown since the first debug location is used
9899
// to determine the end of the prologue.
99100
DebugLoc dl;
100-
bool NeedsStackRealignment = RegInfo.shouldRealignStack(MF);
101-
102-
if (NeedsStackRealignment && !RegInfo.canRealignStack(MF))
103-
report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
104-
"stack re-alignment, but LLVM couldn't handle it "
105-
"(probably because it has a dynamic alloca).");
106101

107102
// Get the number of bytes to allocate from the FrameInfo
108103
int NumBytes = (int) MFI.getStackSize();
@@ -168,31 +163,6 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
168163
MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
169164
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
170165
.addCFIIndex(CFIIndex);
171-
172-
if (NeedsStackRealignment) {
173-
int64_t Bias = Subtarget.getStackPointerBias();
174-
unsigned regUnbiased;
175-
if (Bias) {
176-
// This clobbers G1 which we always know is available here.
177-
regUnbiased = SP::G1;
178-
// add %o6, BIAS, %g1
179-
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), regUnbiased)
180-
.addReg(SP::O6).addImm(Bias);
181-
} else
182-
regUnbiased = SP::O6;
183-
184-
// andn %regUnbiased, MaxAlign-1, %regUnbiased
185-
Align MaxAlign = MFI.getMaxAlign();
186-
BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), regUnbiased)
187-
.addReg(regUnbiased)
188-
.addImm(MaxAlign.value() - 1U);
189-
190-
if (Bias) {
191-
// add %g1, -BIAS, %o6
192-
BuildMI(MBB, MBBI, dl, TII.get(SP::ADDri), SP::O6)
193-
.addReg(regUnbiased).addImm(-Bias);
194-
}
195-
}
196166
}
197167

198168
MachineBasicBlock::iterator SparcFrameLowering::
@@ -257,8 +227,7 @@ bool SparcFrameLowering::hasFPImpl(const MachineFunction &MF) const {
257227

258228
const MachineFrameInfo &MFI = MF.getFrameInfo();
259229
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
260-
RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
261-
MFI.isFrameAddressTaken();
230+
MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken();
262231
}
263232

264233
StackOffset
@@ -284,11 +253,6 @@ SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
284253
} else if (isFixed) {
285254
// Otherwise, argument access should always use %fp.
286255
UseFP = true;
287-
} else if (RegInfo->hasStackRealignment(MF)) {
288-
// If there is dynamic stack realignment, all local object
289-
// references need to be via %sp, to take account of the
290-
// re-alignment.
291-
UseFP = false;
292256
} else {
293257
// Finally, default to using %fp.
294258
UseFP = true;

llvm/lib/Target/Sparc/SparcISelLowering.cpp

+30-23
Original file line numberDiff line numberDiff line change
@@ -2762,22 +2762,16 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
27622762

27632763
static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
27642764
const SparcSubtarget *Subtarget) {
2765-
SDValue Chain = Op.getOperand(0); // Legalize the chain.
2766-
SDValue Size = Op.getOperand(1); // Legalize the size.
2767-
MaybeAlign Alignment =
2768-
cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
2769-
Align StackAlign = Subtarget->getFrameLowering()->getStackAlign();
2765+
SDValue Chain = Op.getOperand(0);
2766+
SDValue Size = Op.getOperand(1);
2767+
SDValue Alignment = Op.getOperand(2);
2768+
MaybeAlign MaybeAlignment =
2769+
cast<ConstantSDNode>(Alignment)->getMaybeAlignValue();
27702770
EVT VT = Size->getValueType(0);
27712771
SDLoc dl(Op);
27722772

2773-
// TODO: implement over-aligned alloca. (Note: also implies
2774-
// supporting support for overaligned function frames + dynamic
2775-
// allocations, at all, which currently isn't supported)
2776-
if (Alignment && *Alignment > StackAlign) {
2777-
const MachineFunction &MF = DAG.getMachineFunction();
2778-
report_fatal_error("Function \"" + Twine(MF.getName()) + "\": "
2779-
"over-aligned dynamic alloca not supported.");
2780-
}
2773+
unsigned SPReg = SP::O6;
2774+
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
27812775

27822776
// The resultant pointer needs to be above the register spill area
27832777
// at the bottom of the stack.
@@ -2811,16 +2805,29 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
28112805
regSpillArea = 96;
28122806
}
28132807

2814-
unsigned SPReg = SP::O6;
2815-
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
2816-
SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
2817-
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP); // Output chain
2818-
2819-
regSpillArea += Subtarget->getStackPointerBias();
2820-
2821-
SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
2822-
DAG.getConstant(regSpillArea, dl, VT));
2823-
SDValue Ops[2] = { NewVal, Chain };
2808+
int64_t Bias = Subtarget->getStackPointerBias();
2809+
2810+
// Debias and increment SP past the reserved spill area.
2811+
// We need the SP to point to the first usable region before calculating
2812+
// anything to prevent any of the pointers from becoming out of alignment when
2813+
// we rebias the SP later on.
2814+
SDValue StartOfUsableStack = DAG.getNode(
2815+
ISD::ADD, dl, VT, SP, DAG.getConstant(regSpillArea + Bias, dl, VT));
2816+
SDValue AllocatedPtr =
2817+
DAG.getNode(ISD::SUB, dl, VT, StartOfUsableStack, Size);
2818+
2819+
bool IsOveraligned = MaybeAlignment.has_value();
2820+
SDValue AlignedPtr =
2821+
IsOveraligned
2822+
? DAG.getNode(ISD::AND, dl, VT, AllocatedPtr,
2823+
DAG.getConstant(-MaybeAlignment->value(), dl, VT))
2824+
: AllocatedPtr;
2825+
2826+
// Now that we are done, restore the bias and reserved spill area.
2827+
SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, AlignedPtr,
2828+
DAG.getConstant(regSpillArea + Bias, dl, VT));
2829+
Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP);
2830+
SDValue Ops[2] = {AlignedPtr, Chain};
28242831
return DAG.getMergeValues(Ops, dl);
28252832
}
28262833

llvm/lib/Target/Sparc/SparcRegisterInfo.cpp

-23
Original file line numberDiff line numberDiff line change
@@ -226,26 +226,3 @@ SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
226226
Register SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
227227
return SP::I6;
228228
}
229-
230-
// Sparc has no architectural need for stack realignment support,
231-
// except that LLVM unfortunately currently implements overaligned
232-
// stack objects by depending upon stack realignment support.
233-
// If that ever changes, this can probably be deleted.
234-
bool SparcRegisterInfo::canRealignStack(const MachineFunction &MF) const {
235-
if (!TargetRegisterInfo::canRealignStack(MF))
236-
return false;
237-
238-
// Sparc always has a fixed frame pointer register, so don't need to
239-
// worry about needing to reserve it. [even if we don't have a frame
240-
// pointer for our frame, it still cannot be used for other things,
241-
// or register window traps will be SADNESS.]
242-
243-
// If there's a reserved call frame, we can use SP to access locals.
244-
if (getFrameLowering(MF)->hasReservedCallFrame(MF))
245-
return true;
246-
247-
// Otherwise, we'd need a base pointer, but those aren't implemented
248-
// for SPARC at the moment.
249-
250-
return false;
251-
}

llvm/lib/Target/Sparc/SparcRegisterInfo.h

-3
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
4040
RegScavenger *RS = nullptr) const override;
4141

4242
Register getFrameRegister(const MachineFunction &MF) const override;
43-
44-
bool canRealignStack(const MachineFunction &MF) const override;
45-
4643
};
4744

4845
} // end namespace llvm

llvm/test/CodeGen/Generic/ForceStackAlign.ll

-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,6 @@
55
; CHECK-LABEL: @f
66
; CHECK-LABEL: @g
77

8-
; Stack realignment not supported.
9-
; XFAIL: target=sparc{{.*}}
10-
118
; NVPTX can only select dynamic_stackalloc on sm_52+ and with ptx73+
129
; XFAIL: target=nvptx{{.*}}
1310

llvm/test/CodeGen/SPARC/2013-05-17-CallFrame.ll

+3-11
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,10 @@
55
; (this should ideally be doing "add 4+7; and -8", instead of
66
; "add 7; and -8; add 8"; see comments in LowerDYNAMIC_STACKALLOC)
77

8-
define void @variable_alloca_with_adj_call_stack(i32 %num) {
8+
define void @variable_alloca_with_adj_call_stack(i32 %num) nounwind {
99
; V8-LABEL: variable_alloca_with_adj_call_stack:
10-
; V8: .cfi_startproc
11-
; V8-NEXT: ! %bb.0: ! %entry
10+
; V8: ! %bb.0: ! %entry
1211
; V8-NEXT: save %sp, -96, %sp
13-
; V8-NEXT: .cfi_def_cfa_register %fp
14-
; V8-NEXT: .cfi_window_save
15-
; V8-NEXT: .cfi_register %o7, %i7
1612
; V8-NEXT: add %i0, 7, %i0
1713
; V8-NEXT: and %i0, -8, %i0
1814
; V8-NEXT: sub %sp, %i0, %i0
@@ -34,12 +30,8 @@ define void @variable_alloca_with_adj_call_stack(i32 %num) {
3430
; V8-NEXT: restore
3531
;
3632
; SPARC64-LABEL: variable_alloca_with_adj_call_stack:
37-
; SPARC64: .cfi_startproc
38-
; SPARC64-NEXT: ! %bb.0: ! %entry
33+
; SPARC64: ! %bb.0: ! %entry
3934
; SPARC64-NEXT: save %sp, -128, %sp
40-
; SPARC64-NEXT: .cfi_def_cfa_register %fp
41-
; SPARC64-NEXT: .cfi_window_save
42-
; SPARC64-NEXT: .cfi_register %o7, %i7
4335
; SPARC64-NEXT: srl %i0, 0, %i0
4436
; SPARC64-NEXT: add %i0, 15, %i0
4537
; SPARC64-NEXT: sethi 4194303, %i1
+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc -march=sparc < %s | FileCheck %s --check-prefixes=CHECK32
3+
; RUN: llc -march=sparcv9 < %s | FileCheck %s --check-prefixes=CHECK64
4+
5+
define void @variable_alloca_with_overalignment(i32 %num) nounwind {
6+
; CHECK32-LABEL: variable_alloca_with_overalignment:
7+
; CHECK32: ! %bb.0:
8+
; CHECK32-NEXT: save %sp, -96, %sp
9+
; CHECK32-NEXT: add %sp, 80, %i1
10+
; CHECK32-NEXT: and %i1, -64, %o0
11+
; CHECK32-NEXT: add %o0, -96, %sp
12+
; CHECK32-NEXT: add %i0, 7, %i0
13+
; CHECK32-NEXT: and %i0, -8, %i0
14+
; CHECK32-NEXT: sub %sp, %i0, %i0
15+
; CHECK32-NEXT: add %i0, -8, %sp
16+
; CHECK32-NEXT: call foo
17+
; CHECK32-NEXT: add %i0, 88, %o1
18+
; CHECK32-NEXT: ret
19+
; CHECK32-NEXT: restore
20+
;
21+
; CHECK64-LABEL: variable_alloca_with_overalignment:
22+
; CHECK64: ! %bb.0:
23+
; CHECK64-NEXT: save %sp, -128, %sp
24+
; CHECK64-NEXT: add %sp, 2159, %i1
25+
; CHECK64-NEXT: and %i1, -64, %o0
26+
; CHECK64-NEXT: add %o0, -2175, %sp
27+
; CHECK64-NEXT: srl %i0, 0, %i0
28+
; CHECK64-NEXT: add %i0, 15, %i0
29+
; CHECK64-NEXT: sethi 4194303, %i1
30+
; CHECK64-NEXT: or %i1, 1008, %i1
31+
; CHECK64-NEXT: sethi 0, %i2
32+
; CHECK64-NEXT: or %i2, 1, %i2
33+
; CHECK64-NEXT: sllx %i2, 32, %i2
34+
; CHECK64-NEXT: or %i2, %i1, %i1
35+
; CHECK64-NEXT: and %i0, %i1, %i0
36+
; CHECK64-NEXT: sub %sp, %i0, %i0
37+
; CHECK64-NEXT: add %i0, 2175, %o1
38+
; CHECK64-NEXT: mov %i0, %sp
39+
; CHECK64-NEXT: call foo
40+
; CHECK64-NEXT: add %sp, -48, %sp
41+
; CHECK64-NEXT: add %sp, 48, %sp
42+
; CHECK64-NEXT: ret
43+
; CHECK64-NEXT: restore
44+
%aligned = alloca i32, align 64
45+
%var_size = alloca i8, i32 %num, align 4
46+
call void @foo(ptr %aligned, ptr %var_size)
47+
ret void
48+
}
49+
50+
;; Same but with the alloca itself overaligned
51+
define void @variable_alloca_with_overalignment_2(i32 %num) nounwind {
52+
; CHECK32-LABEL: variable_alloca_with_overalignment_2:
53+
; CHECK32: ! %bb.0:
54+
; CHECK32-NEXT: save %sp, -96, %sp
55+
; CHECK32-NEXT: add %i0, 7, %i0
56+
; CHECK32-NEXT: and %i0, -8, %i0
57+
; CHECK32-NEXT: sub %sp, %i0, %i0
58+
; CHECK32-NEXT: add %i0, 88, %i0
59+
; CHECK32-NEXT: and %i0, -64, %o1
60+
; CHECK32-NEXT: add %o1, -96, %sp
61+
; CHECK32-NEXT: call foo
62+
; CHECK32-NEXT: mov %g0, %o0
63+
; CHECK32-NEXT: ret
64+
; CHECK32-NEXT: restore
65+
;
66+
; CHECK64-LABEL: variable_alloca_with_overalignment_2:
67+
; CHECK64: ! %bb.0:
68+
; CHECK64-NEXT: save %sp, -128, %sp
69+
; CHECK64-NEXT: srl %i0, 0, %i0
70+
; CHECK64-NEXT: add %i0, 15, %i0
71+
; CHECK64-NEXT: sethi 4194303, %i1
72+
; CHECK64-NEXT: or %i1, 1008, %i1
73+
; CHECK64-NEXT: sethi 0, %i2
74+
; CHECK64-NEXT: or %i2, 1, %i2
75+
; CHECK64-NEXT: sllx %i2, 32, %i2
76+
; CHECK64-NEXT: or %i2, %i1, %i1
77+
; CHECK64-NEXT: and %i0, %i1, %i0
78+
; CHECK64-NEXT: sub %sp, %i0, %i0
79+
; CHECK64-NEXT: add %i0, 2175, %i0
80+
; CHECK64-NEXT: and %i0, -64, %o1
81+
; CHECK64-NEXT: add %o1, -2175, %sp
82+
; CHECK64-NEXT: add %sp, -48, %sp
83+
; CHECK64-NEXT: call foo
84+
; CHECK64-NEXT: mov %g0, %o0
85+
; CHECK64-NEXT: add %sp, 48, %sp
86+
; CHECK64-NEXT: ret
87+
; CHECK64-NEXT: restore
88+
%var_size = alloca i8, i32 %num, align 64
89+
call void @foo(ptr null, ptr %var_size)
90+
ret void
91+
}
92+
93+
declare void @foo(ptr, ptr);

llvm/test/CodeGen/SPARC/fail-alloca-align.ll

-23
This file was deleted.

llvm/test/CodeGen/SPARC/fp128.ll

+4-12
Original file line numberDiff line numberDiff line change
@@ -54,18 +54,10 @@ entry:
5454

5555
; CHECK-LABEL: f128_spill_large:
5656
; CHECK: sethi 4, %g1
57-
; CHECK: sethi 4, %g1
58-
; CHECK-NEXT: add %g1, %sp, %g1
59-
; CHECK-NEXT: std %f{{.+}}, [%g1]
60-
; CHECK: sethi 4, %g1
61-
; CHECK-NEXT: add %g1, %sp, %g1
62-
; CHECK-NEXT: std %f{{.+}}, [%g1+8]
63-
; CHECK: sethi 4, %g1
64-
; CHECK-NEXT: add %g1, %sp, %g1
65-
; CHECK-NEXT: ldd [%g1], %f{{.+}}
66-
; CHECK: sethi 4, %g1
67-
; CHECK-NEXT: add %g1, %sp, %g1
68-
; CHECK-NEXT: ldd [%g1+8], %f{{.+}}
57+
; CHECK: std %f{{.+}}, [%fp+-16]
58+
; CHECK-NEXT: std %f{{.+}}, [%fp+-8]
59+
; CHECK: ldd [%fp+-16], %f{{.+}}
60+
; CHECK-NEXT: ldd [%fp+-8], %f{{.+}}
6961

7062
define void @f128_spill_large(ptr noalias sret(<251 x fp128>) %scalar.result, ptr byval(<251 x fp128>) %a) {
7163
entry:

0 commit comments

Comments
 (0)