Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Try to make stack coloring work better with unwinding #69

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 95 additions & 7 deletions lib/CodeGen/StackColoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,15 @@ class StackColoring : public MachineFunctionPass {
/// Each bit in the BitVector represents the liveness property
/// for a different stack slot.
struct BlockLifetimeInfo {
/// Which slots BEGINs in each basic block.
/// Which slots BEGIN in this block and survive to its end.
BitVector Begin;
/// Which slots ENDs in each basic block.
/// Which slots BEGIN and END in this block.
BitVector Use;
/// Which slots END in this block.
BitVector End;
/// Which slots are marked as LIVE_IN, coming into each basic block.
/// Which slots are marked as LIVE_IN, coming into this block.
BitVector LiveIn;
/// Which slots are marked as LIVE_OUT, coming out of each basic block.
/// Which slots are marked as LIVE_OUT, coming out of this block.
BitVector LiveOut;
};

Expand All @@ -272,8 +274,11 @@ class StackColoring : public MachineFunctionPass {
/// Maps basic blocks to a serial number.
SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering;

/// Maps liveness intervals for each slot.
/// Maps slots to their activity interval. Outside of this interval, slots
/// values are either dead or `undef` and they will not be written to.
SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals;
/// Maps slots to the set of gen-points of their intervals.
SmallVector<std::unique_ptr<LiveInterval>, 16> IntervalStarts;
/// VNInfo is used for the construction of LiveIntervals.
VNInfo::Allocator VNInfoAllocator;
/// SlotIndex analysis object.
Expand Down Expand Up @@ -401,6 +406,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
const BlockLifetimeInfo &BlockInfo = BI->second;

dumpBV("BEGIN", BlockInfo.Begin);
dumpBV("USE", BlockInfo.Use);
dumpBV("END", BlockInfo.End);
dumpBV("LIVE_IN", BlockInfo.LiveIn);
dumpBV("LIVE_OUT", BlockInfo.LiveOut);
Expand All @@ -418,6 +424,8 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
DEBUG(dbgs() << "Interval[" << I << "]:\n");
DEBUG(Intervals[I]->dump());
DEBUG(dbgs() << "IntervalStarts[" << I << "]:\n");
DEBUG(IntervalStarts[I]->dump());
}
}

Expand Down Expand Up @@ -583,6 +591,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];

BlockInfo.Begin.resize(NumSlot);
BlockInfo.Use.resize(NumSlot);
BlockInfo.End.resize(NumSlot);

SmallVector<int, 4> slots;
Expand All @@ -595,6 +604,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
int Slot = slots[0];
if (BlockInfo.Begin.test(Slot)) {
BlockInfo.Begin.reset(Slot);
BlockInfo.Use.set(Slot);
}
BlockInfo.End.set(Slot);
} else {
Expand All @@ -610,6 +620,9 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
if (BlockInfo.End.test(Slot)) {
BlockInfo.End.reset(Slot);
}
if (BlockInfo.Use.test(Slot)) {
BlockInfo.Use.reset(Slot);
}
BlockInfo.Begin.set(Slot);
}
}
Expand Down Expand Up @@ -745,18 +758,28 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {

assert(Starts[i] && Finishes[i] && "Invalid interval");
VNInfo *ValNum = Intervals[i]->getValNumInfo(0);
VNInfo *ValNumS = IntervalStarts[i]->getValNumInfo(0);
SlotIndex S = Starts[i];
SlotIndex F = Finishes[i];
if (S < F) {
// We have a single consecutive region.
Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum));
// FIXME: stop cargo culting
if (MBBLiveness.Begin.test(i) || MBBLiveness.Use.test(i)) {
IntervalStarts[i]->addSegment(LiveInterval::Segment(S, F, ValNumS));
}
} else {
// We have two non-consecutive regions. This happens when
// LIFETIME_START appears after the LIFETIME_END marker.
SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB);
SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB);
Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum));
Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum));
// FIXME: stop cargo culting
if (MBBLiveness.Begin.test(i) || MBBLiveness.Use.test(i)) {
IntervalStarts[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNumS));
IntervalStarts[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNumS));
}
}
}
}
Expand Down Expand Up @@ -988,6 +1011,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
BasicBlockNumbering.clear();
Markers.clear();
Intervals.clear();
IntervalStarts.clear();
VNInfoAllocator.Reset();

unsigned NumSlots = MFI->getObjectIndexEnd();
Expand Down Expand Up @@ -1025,6 +1049,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
Intervals.push_back(std::move(LI));

// Just cargo culting. Please help me DTRT.
std::unique_ptr<LiveInterval> SI(new LiveInterval(i, 0));
SI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
IntervalStarts.push_back(std::move(SI));

SortedSlots.push_back(i);
}

Expand Down Expand Up @@ -1084,13 +1114,71 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
int FirstSlot = SortedSlots[I];
int SecondSlot = SortedSlots[J];
LiveInterval *First = &*Intervals[FirstSlot];
LiveInterval *FirstS = &*IntervalStarts[FirstSlot];
LiveInterval *Second = &*Intervals[SecondSlot];
LiveInterval *SecondS = &*IntervalStarts[SecondSlot];
assert (!First->empty() && !Second->empty() && "Found an empty range");

// Merge disjoint slots.
if (!First->overlaps(*Second)) {
// Merge disjoint slots. Now, the condition for this is a little bit
// tricky.
//
// The fundamental condition we want to preserve is that each stack
// slot has the correct contents at each point it is live.
//
// We *could* compute liveness using the standard backward dataflow
// algorithm. Unfortunately, that does not give very good results in the
// presence of aliasing, so we have frontends emit `lifetime.start` and
// `lifetime.end` intrinsics that make undesirable accesses UB.
//
// The effect of these intrinsics is as follows:
// 1) at start, each stack-slot is marked as *out-of-scope*, unless no
// lifetime intrinsic refers to that stack slot, in which case
// it is marked as *in-scope*.
// 2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
// the stack slot is overwritten with `undef`.
// 3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
// 4) on function exit, all stack slots are marked as *out-of-scope*.
// 5) the effects of calling `lifetime.start` on an *in-scope* stack-slot,
// or `lifetime.end` on an *out-of-scope* stack-slot, are left unspecified.
// 6) memory accesses to *out-of-scope* stack slots are UB.
// 7) when a stack-slot is marked as *out-of-scope*, all pointers to it
// are invalidated unless it looks like they might be used (?). This
// is used to justify not marking slots as live until the pointer
// to them is used, but I think this should be clarified better.
//
// If we define a slot as *active* at a program point if it either can
// be written to, or if it has a live and non-undef content, then it
// is obvious that slots that are never active together can be merged.
//
// From our rules, we see that *out-of-scope* slots are never *active*,
// and from (7) we see that "non-conservative" slots remain non-*active*
// until their address is taken. Therefore, we can approximate slot activity
// using dataflow.
//
// Now, naively, we might think that we could construct our interference
// graph by propagating `S active` through the CFG for every stack-slot `S`,
// and having `S` and `T` interfere if there is a point in which they are
// both *active*. That is sound, but overly conservative in some important
// cases: it is possible that `S` is active on one predecessor edge and
// `T` is active on another. See PR32488.
//
// If we want to construct the interference graph precisely, we could
// propagate `S active` and `S&T active` predicates through the CFG. That
// would be precise, but requires propagating `O(n^2)` dataflow facts.
//
// Instead, we rely on a little trick: for an `S&T active` predicate to
// start holding, there has to be either
// A) a point in the gen-set of `S active` where `T` is *active*
// B) a point in the gen-set of `T active` where `S` is *active*
// C) a point in the gen-set of both `S active` and `T active`.
//
// Of course, the `S&T active` predicate can be propagated further, but
// it holding at 1 point is enough for us to mark an edge on the interference
// graph. So that's what we do.
if (!First->overlaps(*SecondS) && !FirstS->overlaps(*Second)) {
Changed = true;
First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
FirstS->MergeSegmentsInAsValue(*SecondS, FirstS->getValNumInfo(0));
SlotRemap[SecondSlot] = FirstSlot;
SortedSlots[J] = -1;
DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<<
Expand Down
42 changes: 42 additions & 0 deletions test/CodeGen/X86/StackColoring.ll
Original file line number Diff line number Diff line change
Expand Up @@ -582,12 +582,54 @@ if.end: ; preds = %if.then, %entry
ret i32 %x.addr.0
}


;CHECK-LABEL: pr32488:
;YESCOLOR: subq $256, %rsp
;NOFIRSTUSE: subq $256, %rsp
;NOCOLOR: subq $512, %rsp
define i1 @pr32488(i1, i1)
{
entry-block:
%foo = alloca [32 x i64]
%bar = alloca [32 x i64]
%foo_i8 = bitcast [32 x i64]* %foo to i8*
%bar_i8 = bitcast [32 x i64]* %bar to i8*
br i1 %0, label %if_false, label %if_true
if_false:
call void @llvm.lifetime.start(i64 256, i8* %bar_i8)
call void @baz([32 x i64]* %foo, i32 0)
br i1 %1, label %if_false.1, label %onerr
if_false.1:
call void @llvm.lifetime.end(i64 256, i8* %bar_i8)
br label %merge
if_true:
call void @llvm.lifetime.start(i64 256, i8* %foo_i8)
call void @baz([32 x i64]* %foo, i32 1)
br i1 %1, label %if_true.1, label %onerr
if_true.1:
call void @llvm.lifetime.end(i64 256, i8* %foo_i8)
br label %merge
merge:
ret i1 false
onerr:
call void @llvm.lifetime.end(i64 256, i8* %foo_i8)
call void @llvm.lifetime.end(i64 256, i8* %bar_i8)
call void @destructor()
ret i1 true
}

%Data = type { [32 x i64] }

declare void @destructor()

declare void @inita(i32*)

declare void @initb(i32*,i32*,i32*)

declare void @bar([100 x i32]* , [100 x i32]*) nounwind

declare void @baz([32 x i64]*, i32)

declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind

declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
Expand Down