Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 03c73da

Browse files
committed
StackColoring: smarter check for slot overlap
The old check for slot overlap treated 2 slots `S` and `T` as overlapping if there existed a CFG node in which both of the slots could possibly be active. That is overly conservative and caused stack blowups in Rust programs. Instead, check whether there is a single CFG node in which both of the slots are possibly active *together*.
1 parent 2e951c3 commit 03c73da

File tree

2 files changed

+137
-7
lines changed

2 files changed

+137
-7
lines changed

lib/CodeGen/StackColoring.cpp

Lines changed: 95 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -253,13 +253,15 @@ class StackColoring : public MachineFunctionPass {
253253
/// Each bit in the BitVector represents the liveness property
254254
/// for a different stack slot.
255255
struct BlockLifetimeInfo {
256-
/// Which slots BEGINs in each basic block.
256+
/// Which slots BEGIN in this block and survive to its end.
257257
BitVector Begin;
258-
/// Which slots ENDs in each basic block.
258+
/// Which slots BEGIN and END in this block.
259+
BitVector Use;
260+
/// Which slots END in this block.
259261
BitVector End;
260-
/// Which slots are marked as LIVE_IN, coming into each basic block.
262+
/// Which slots are marked as LIVE_IN, coming into this block.
261263
BitVector LiveIn;
262-
/// Which slots are marked as LIVE_OUT, coming out of each basic block.
264+
/// Which slots are marked as LIVE_OUT, coming out of this block.
263265
BitVector LiveOut;
264266
};
265267

@@ -272,8 +274,11 @@ class StackColoring : public MachineFunctionPass {
272274
/// Maps basic blocks to a serial number.
273275
SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;
274276

275-
/// Maps liveness intervals for each slot.
277+
/// Maps slots to their activity interval. Outside of this interval, slots
278+
/// values are either dead or `undef` and they will not be written to.
276279
SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
280+
/// Maps slots to the set of gen-points of their intervals.
281+
SmallVector<std::unique_ptr<LiveInterval>, 16> IntervalStarts;
277282
/// VNInfo is used for the construction of LiveIntervals.
278283
VNInfo::Allocator VNInfoAllocator;
279284
/// SlotIndex analysis object.
@@ -401,6 +406,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
401406
const BlockLifetimeInfo &BlockInfo = BI->second;
402407

403408
dumpBV("BEGIN", BlockInfo.Begin);
409+
dumpBV("USE", BlockInfo.Use);
404410
dumpBV("END", BlockInfo.End);
405411
dumpBV("LIVE_IN", BlockInfo.LiveIn);
406412
dumpBV("LIVE_OUT", BlockInfo.LiveOut);
@@ -418,6 +424,8 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
418424
for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
419425
DEBUG(dbgs() << "Interval[" << I << "]:\n");
420426
DEBUG(Intervals[I]->dump());
427+
DEBUG(dbgs() << "IntervalStarts[" << I << "]:\n");
428+
DEBUG(IntervalStarts[I]->dump());
421429
}
422430
}
423431

@@ -583,6 +591,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
583591
BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
584592

585593
BlockInfo.Begin.resize(NumSlot);
594+
BlockInfo.Use.resize(NumSlot);
586595
BlockInfo.End.resize(NumSlot);
587596

588597
SmallVector<int, 4> slots;
@@ -595,6 +604,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
595604
int Slot = slots[0];
596605
if (BlockInfo.Begin.test(Slot)) {
597606
BlockInfo.Begin.reset(Slot);
607+
BlockInfo.Use.set(Slot);
598608
}
599609
BlockInfo.End.set(Slot);
600610
} else {
@@ -610,6 +620,9 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
610620
if (BlockInfo.End.test(Slot)) {
611621
BlockInfo.End.reset(Slot);
612622
}
623+
if (BlockInfo.Use.test(Slot)) {
624+
BlockInfo.Use.reset(Slot);
625+
}
613626
BlockInfo.Begin.set(Slot);
614627
}
615628
}
@@ -745,18 +758,28 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
745758

746759
assert(Starts[i] && Finishes[i] && "Invalid interval");
747760
VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
761+
VNInfo *ValNumS = IntervalStarts[i]->getValNumInfo(0);
748762
SlotIndex S = Starts[i];
749763
SlotIndex F = Finishes[i];
750764
if (S < F) {
751765
// We have a single consecutive region.
752766
Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
767+
// FIXME: stop cargo culting
768+
if (MBBLiveness.Begin.test(i) || MBBLiveness.Use.test(i)) {
769+
IntervalStarts[i]->addSegment(LiveInterval::Segment(S, F, ValNumS));
770+
}
753771
} else {
754772
// We have two non-consecutive regions. This happens when
755773
// LIFETIME_START appears after the LIFETIME_END marker.
756774
SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
757775
SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
758776
Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
759777
Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
778+
// FIXME: stop cargo culting
779+
if (MBBLiveness.Begin.test(i) || MBBLiveness.Use.test(i)) {
780+
IntervalStarts[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNumS));
781+
IntervalStarts[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNumS));
782+
}
760783
}
761784
}
762785
}
@@ -988,6 +1011,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
9881011
BasicBlockNumbering.clear();
9891012
Markers.clear();
9901013
Intervals.clear();
1014+
IntervalStarts.clear();
9911015
VNInfoAllocator.Reset();
9921016

9931017
unsigned NumSlots = MFI->getObjectIndexEnd();
@@ -1025,6 +1049,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
10251049
std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
10261050
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
10271051
Intervals.push_back(std::move(LI));
1052+
1053+
// Just cargo culting. Please help me DTRT.
1054+
std::unique_ptr<LiveInterval> SI(new LiveInterval(i, 0));
1055+
SI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1056+
IntervalStarts.push_back(std::move(SI));
1057+
10281058
SortedSlots.push_back(i);
10291059
}
10301060

@@ -1084,13 +1114,71 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
10841114
int FirstSlot = SortedSlots[I];
10851115
int SecondSlot = SortedSlots[J];
10861116
LiveInterval *First = &*Intervals[FirstSlot];
1117+
LiveInterval *FirstS = &*IntervalStarts[FirstSlot];
10871118
LiveInterval *Second = &*Intervals[SecondSlot];
1119+
LiveInterval *SecondS = &*IntervalStarts[SecondSlot];
10881120
assert (!First->empty() && !Second->empty() && "Found an empty range");
10891121

1090-
// Merge disjoint slots.
1091-
if (!First->overlaps(*Second)) {
1122+
// Merge disjoint slots. Now, the condition for this is a little bit
1123+
// tricky.
1124+
//
1125+
// The fundamental condition we want to preserve is that each stack
1126+
// slot has the correct contents at each point it is live.
1127+
//
1128+
// We *could* compute liveness using the standard backward dataflow
1129+
// algorithm. Unfortunately, that does not give very good results in the
1130+
// presence of aliasing, so we have frontends emit `lifetime.start` and
1131+
// `lifetime.end` intrinsics that make undesirable accesses UB.
1132+
//
1133+
// The effect of these intrinsics is as follows:
1134+
// 1) at start, each stack-slot is marked as *out-of-scope*, unless no
1135+
// lifetime intrinsic refers to that stack slot, in which case
1136+
// it is marked as *in-scope*.
1137+
// 2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
1138+
// the stack slot is overwritten with `undef`.
1139+
// 3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
1140+
// 4) on function exit, all stack slots are marked as *out-of-scope*.
1141+
// 5) the effects of calling `lifetime.start` on an *in-scope* stack-slot,
1142+
// or `lifetime.end` on an *out-of-scope* stack-slot, are left unspecified.
1143+
// 6) memory accesses to *out-of-scope* stack slots are UB.
1144+
// 7) when a stack-slot is marked as *out-of-scope*, all pointers to it
1145+
// are invalidated unless it looks like they might be used (?). This
1146+
// is used to justify not marking slots as live until the pointer
1147+
// to them is used, but I think this should be clarified better.
1148+
//
1149+
// If we define a slot as *active* at a program point if it either can
1150+
// be written to, or if it has a live and non-undef content, then it
1151+
// is obvious that slots that are never active together can be merged.
1152+
//
1153+
// From our rules, we see that *out-of-scope* slots are never *active*,
1154+
// and from (7) we see that "non-conservative" slots remain non-*active*
1155+
// until their address is taken. Therefore, we can approximate slot activity
1156+
// using dataflow.
1157+
//
1158+
// Now, naively, we might think that we could construct our interference
1159+
// graph by propagating `S active` through the CFG for every stack-slot `S`,
1160+
// and having `S` and `T` interfere if there is a point in which they are
1161+
// both *active*. That is sound, but overly conservative in some important
1162+
// cases: it is possible that `S` is active on one predecessor edge and
1163+
// `T` is active on another. See PR32488.
1164+
//
1165+
// If we want to construct the interference graph precisely, we could
1166+
// propagate `S active` and `S&T active` predicates through the CFG. That
1167+
// would be precise, but requires propagating `O(n^2)` dataflow facts.
1168+
//
1169+
// Instead, we rely on a little trick: for an `S&T active` predicate to
1170+
// start holding, there has to be either
1171+
// A) a point in the gen-set of `S active` where `T` is *active*
1172+
// B) a point in the gen-set of `T active` where `S` is *active*
1173+
// C) a point in the gen-set of both `S active` and `T active`.
1174+
//
1175+
// Of course, the `S&T active` predicate can be propagated further, but
1176+
// it holding at 1 point is enough for us to mark an edge on the interference
1177+
// graph. So that's what we do.
1178+
if (!First->overlaps(*SecondS) && !FirstS->overlaps(*Second)) {
10921179
Changed = true;
10931180
First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1181+
FirstS->MergeSegmentsInAsValue(*SecondS, FirstS->getValNumInfo(0));
10941182
SlotRemap[SecondSlot] = FirstSlot;
10951183
SortedSlots[J] = -1;
10961184
DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<

test/CodeGen/X86/StackColoring.ll

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,12 +582,54 @@ if.end: ; preds = %if.then, %entry
582582
ret i32 %x.addr.0
583583
}
584584

585+
586+
;CHECK-LABEL: pr32488:
587+
;YESCOLOR: subq $256, %rsp
588+
;NOFIRSTUSE: subq $256, %rsp
589+
;NOCOLOR: subq $512, %rsp
590+
define i1 @pr32488(i1, i1)
591+
{
592+
entry-block:
593+
%foo = alloca [32 x i64]
594+
%bar = alloca [32 x i64]
595+
%foo_i8 = bitcast [32 x i64]* %foo to i8*
596+
%bar_i8 = bitcast [32 x i64]* %bar to i8*
597+
br i1 %0, label %if_false, label %if_true
598+
if_false:
599+
call void @llvm.lifetime.start(i64 256, i8* %bar_i8)
600+
call void @baz([32 x i64]* %foo, i32 0)
601+
br i1 %1, label %if_false.1, label %onerr
602+
if_false.1:
603+
call void @llvm.lifetime.end(i64 256, i8* %bar_i8)
604+
br label %merge
605+
if_true:
606+
call void @llvm.lifetime.start(i64 256, i8* %foo_i8)
607+
call void @baz([32 x i64]* %foo, i32 1)
608+
br i1 %1, label %if_true.1, label %onerr
609+
if_true.1:
610+
call void @llvm.lifetime.end(i64 256, i8* %foo_i8)
611+
br label %merge
612+
merge:
613+
ret i1 false
614+
onerr:
615+
call void @llvm.lifetime.end(i64 256, i8* %foo_i8)
616+
call void @llvm.lifetime.end(i64 256, i8* %bar_i8)
617+
call void @destructor()
618+
ret i1 true
619+
}
620+
621+
%Data = type { [32 x i64] }
622+
623+
declare void @destructor()
624+
585625
declare void @inita(i32*)
586626

587627
declare void @initb(i32*,i32*,i32*)
588628

589629
declare void @bar([100 x i32]* , [100 x i32]*) nounwind
590630

631+
declare void @baz([32 x i64]*, i32)
632+
591633
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
592634

593635
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind

0 commit comments

Comments
 (0)