@@ -253,13 +253,15 @@ class StackColoring : public MachineFunctionPass {
253
253
// / Each bit in the BitVector represents the liveness property
254
254
// / for a different stack slot.
255
255
struct BlockLifetimeInfo {
256
- // / Which slots BEGINs in each basic block.
256
+ // / Which slots BEGIN in this block and survive to its end .
257
257
BitVector Begin;
258
- // / Which slots ENDs in each basic block.
258
+ // / Which slots BEGIN and END in this block.
259
+ BitVector Use;
260
+ // / Which slots END in this block.
259
261
BitVector End;
260
- // / Which slots are marked as LIVE_IN, coming into each basic block.
262
+ // / Which slots are marked as LIVE_IN, coming into this block.
261
263
BitVector LiveIn;
262
- // / Which slots are marked as LIVE_OUT, coming out of each basic block.
264
+ // / Which slots are marked as LIVE_OUT, coming out of this block.
263
265
BitVector LiveOut;
264
266
};
265
267
@@ -272,8 +274,11 @@ class StackColoring : public MachineFunctionPass {
272
274
// / Maps basic blocks to a serial number.
273
275
SmallVector<const MachineBasicBlock*, 8 > BasicBlockNumbering;
274
276
275
- // / Maps liveness intervals for each slot.
277
+ // / Maps slots to their activity interval. Outside of this interval, slots
278
+ // / values are either dead or `undef` and they will not be written to.
276
279
SmallVector<std::unique_ptr<LiveInterval>, 16 > Intervals;
280
+ // / Maps slots to the set of gen-points of their intervals.
281
+ SmallVector<std::unique_ptr<LiveInterval>, 16 > IntervalStarts;
277
282
// / VNInfo is used for the construction of LiveIntervals.
278
283
VNInfo::Allocator VNInfoAllocator;
279
284
// / SlotIndex analysis object.
@@ -401,6 +406,7 @@ LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
401
406
const BlockLifetimeInfo &BlockInfo = BI->second ;
402
407
403
408
dumpBV (" BEGIN" , BlockInfo.Begin );
409
+ dumpBV (" USE" , BlockInfo.Use );
404
410
dumpBV (" END" , BlockInfo.End );
405
411
dumpBV (" LIVE_IN" , BlockInfo.LiveIn );
406
412
dumpBV (" LIVE_OUT" , BlockInfo.LiveOut );
@@ -418,6 +424,8 @@ LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
418
424
for (unsigned I = 0 , E = Intervals.size (); I != E; ++I) {
419
425
DEBUG (dbgs () << " Interval[" << I << " ]:\n " );
420
426
DEBUG (Intervals[I]->dump ());
427
+ DEBUG (dbgs () << " IntervalStarts[" << I << " ]:\n " );
428
+ DEBUG (IntervalStarts[I]->dump ());
421
429
}
422
430
}
423
431
@@ -583,6 +591,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
583
591
BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
584
592
585
593
BlockInfo.Begin .resize (NumSlot);
594
+ BlockInfo.Use .resize (NumSlot);
586
595
BlockInfo.End .resize (NumSlot);
587
596
588
597
SmallVector<int , 4 > slots;
@@ -595,6 +604,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
595
604
int Slot = slots[0 ];
596
605
if (BlockInfo.Begin .test (Slot)) {
597
606
BlockInfo.Begin .reset (Slot);
607
+ BlockInfo.Use .set (Slot);
598
608
}
599
609
BlockInfo.End .set (Slot);
600
610
} else {
@@ -610,6 +620,9 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot)
610
620
if (BlockInfo.End .test (Slot)) {
611
621
BlockInfo.End .reset (Slot);
612
622
}
623
+ if (BlockInfo.Use .test (Slot)) {
624
+ BlockInfo.Use .reset (Slot);
625
+ }
613
626
BlockInfo.Begin .set (Slot);
614
627
}
615
628
}
@@ -745,18 +758,28 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
745
758
746
759
assert (Starts[i] && Finishes[i] && " Invalid interval" );
747
760
VNInfo *ValNum = Intervals[i]->getValNumInfo (0 );
761
+ VNInfo *ValNumS = IntervalStarts[i]->getValNumInfo (0 );
748
762
SlotIndex S = Starts[i];
749
763
SlotIndex F = Finishes[i];
750
764
if (S < F) {
751
765
// We have a single consecutive region.
752
766
Intervals[i]->addSegment (LiveInterval::Segment (S, F, ValNum));
767
+ // FIXME: stop cargo culting
768
+ if (MBBLiveness.Begin .test (i) || MBBLiveness.Use .test (i)) {
769
+ IntervalStarts[i]->addSegment (LiveInterval::Segment (S, F, ValNumS));
770
+ }
753
771
} else {
754
772
// We have two non-consecutive regions. This happens when
755
773
// LIFETIME_START appears after the LIFETIME_END marker.
756
774
SlotIndex NewStart = Indexes->getMBBStartIdx (&MBB);
757
775
SlotIndex NewFin = Indexes->getMBBEndIdx (&MBB);
758
776
Intervals[i]->addSegment (LiveInterval::Segment (NewStart, F, ValNum));
759
777
Intervals[i]->addSegment (LiveInterval::Segment (S, NewFin, ValNum));
778
+ // FIXME: stop cargo culting
779
+ if (MBBLiveness.Begin .test (i) || MBBLiveness.Use .test (i)) {
780
+ IntervalStarts[i]->addSegment (LiveInterval::Segment (NewStart, F, ValNumS));
781
+ IntervalStarts[i]->addSegment (LiveInterval::Segment (S, NewFin, ValNumS));
782
+ }
760
783
}
761
784
}
762
785
}
@@ -988,6 +1011,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
988
1011
BasicBlockNumbering.clear ();
989
1012
Markers.clear ();
990
1013
Intervals.clear ();
1014
+ IntervalStarts.clear ();
991
1015
VNInfoAllocator.Reset ();
992
1016
993
1017
unsigned NumSlots = MFI->getObjectIndexEnd ();
@@ -1025,6 +1049,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1025
1049
std::unique_ptr<LiveInterval> LI (new LiveInterval (i, 0 ));
1026
1050
LI->getNextValue (Indexes->getZeroIndex (), VNInfoAllocator);
1027
1051
Intervals.push_back (std::move (LI));
1052
+
1053
+ // Just cargo culting. Please help me DTRT.
1054
+ std::unique_ptr<LiveInterval> SI (new LiveInterval (i, 0 ));
1055
+ SI->getNextValue (Indexes->getZeroIndex (), VNInfoAllocator);
1056
+ IntervalStarts.push_back (std::move (SI));
1057
+
1028
1058
SortedSlots.push_back (i);
1029
1059
}
1030
1060
@@ -1084,13 +1114,71 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
1084
1114
int FirstSlot = SortedSlots[I];
1085
1115
int SecondSlot = SortedSlots[J];
1086
1116
LiveInterval *First = &*Intervals[FirstSlot];
1117
+ LiveInterval *FirstS = &*IntervalStarts[FirstSlot];
1087
1118
LiveInterval *Second = &*Intervals[SecondSlot];
1119
+ LiveInterval *SecondS = &*IntervalStarts[SecondSlot];
1088
1120
assert (!First->empty () && !Second->empty () && " Found an empty range" );
1089
1121
1090
- // Merge disjoint slots.
1091
- if (!First->overlaps (*Second)) {
1122
+ // Merge disjoint slots. Now, the condition for this is a little bit
1123
+ // tricky.
1124
+ //
1125
+ // The fundamental condition we want to preserve is that each stack
1126
+ // slot has the correct contents at each point it is live.
1127
+ //
1128
+ // We *could* compute liveness using the standard backward dataflow
1129
+ // algorithm. Unfortunately, that does not give very good results in the
1130
+ // presence of aliasing, so we have frontends emit `lifetime.start` and
1131
+ // `lifetime.end` intrinsics that make undesirable accesses UB.
1132
+ //
1133
+ // The effect of these intrinsics is as follows:
1134
+ // 1) at start, each stack-slot is marked as *out-of-scope*, unless no
1135
+ // lifetime intrinsic refers to that stack slot, in which case
1136
+ // it is marked as *in-scope*.
1137
+ // 2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
1138
+ // the stack slot is overwritten with `undef`.
1139
+ // 3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
1140
+ // 4) on function exit, all stack slots are marked as *out-of-scope*.
1141
+ // 5) the effects of calling `lifetime.start` on an *in-scope* stack-slot,
1142
+ // or `lifetime.end` on an *out-of-scope* stack-slot, are left unspecified.
1143
+ // 6) memory accesses to *out-of-scope* stack slots are UB.
1144
+ // 7) when a stack-slot is marked as *out-of-scope*, all pointers to it
1145
+ // are invalidated unless it looks like they might be used (?). This
1146
+ // is used to justify not marking slots as live until the pointer
1147
+ // to them is used, but I think this should be clarified better.
1148
+ //
1149
+ // If we define a slot as *active* at a program point if it either can
1150
+ // be written to, or if it has a live and non-undef content, then it
1151
+ // is obvious that slots that are never active together can be merged.
1152
+ //
1153
+ // From our rules, we see that *out-of-scope* slots are never *active*,
1154
+ // and from (7) we see that "non-conservative" slots remain non-*active*
1155
+ // until their address is taken. Therefore, we can approximate slot activity
1156
+ // using dataflow.
1157
+ //
1158
+ // Now, naively, we might think that we could construct our interference
1159
+ // graph by propagating `S active` through the CFG for every stack-slot `S`,
1160
+ // and having `S` and `T` interfere if there is a point in which they are
1161
+ // both *active*. That is sound, but overly conservative in some important
1162
+ // cases: it is possible that `S` is active on one predecessor edge and
1163
+ // `T` is active on another. See PR32488.
1164
+ //
1165
+ // If we want to construct the interference graph precisely, we could
1166
+ // propagate `S active` and `S&T active` predicates through the CFG. That
1167
+ // would be precise, but requires propagating `O(n^2)` dataflow facts.
1168
+ //
1169
+ // Instead, we rely on a little trick: for an `S&T active` predicate to
1170
+ // start holding, there has to be either
1171
+ // A) a point in the gen-set of `S active` where `T` is *active*
1172
+ // B) a point in the gen-set of `T active` where `S` is *active*
1173
+ // C) a point in the gen-set of both `S active` and `T active`.
1174
+ //
1175
+ // Of course, the `S&T active` predicate can be propagated further, but
1176
+ // it holding at 1 point is enough for us to mark an edge on the interference
1177
+ // graph. So that's what we do.
1178
+ if (!First->overlaps (*SecondS) && !FirstS->overlaps (*Second)) {
1092
1179
Changed = true ;
1093
1180
First->MergeSegmentsInAsValue (*Second, First->getValNumInfo (0 ));
1181
+ FirstS->MergeSegmentsInAsValue (*SecondS, FirstS->getValNumInfo (0 ));
1094
1182
SlotRemap[SecondSlot] = FirstSlot;
1095
1183
SortedSlots[J] = -1 ;
1096
1184
DEBUG (dbgs ()<<" Merging #" <<FirstSlot<<" and slots #" <<
0 commit comments