@@ -197,7 +197,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
197
197
}
198
198
199
199
void createAdjacencyStructure (SwingSchedulerDAG *DAG);
200
- bool circuit (int V, int S, NodeSetType &NodeSets, bool HasBackedge = false );
200
+ bool circuit (int V, int S, NodeSetType &NodeSets,
201
+ const SwingSchedulerDAG *DAG, bool HasBackedge = false );
201
202
void unblock (int U);
202
203
};
203
204
@@ -260,7 +261,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
260
261
return Source->getInstr ()->isPHI () || Dep.getSUnit ()->getInstr ()->isPHI ();
261
262
}
262
263
263
- bool isLoopCarriedDep (SUnit *Source, const SDep &Dep, bool isSucc = true );
264
+ bool isLoopCarriedDep (SUnit *Source, const SDep &Dep,
265
+ bool isSucc = true ) const ;
264
266
265
267
// / The distance function, which indicates that operation V of iteration I
266
268
// / depends on operations U of iteration I-distance.
@@ -311,7 +313,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
311
313
void computeNodeOrder (NodeSetType &NodeSets);
312
314
void checkValidNodeOrder (const NodeSetType &Circuits) const ;
313
315
bool schedulePipeline (SMSchedule &Schedule);
314
- bool computeDelta (MachineInstr &MI, unsigned &Delta);
316
+ bool computeDelta (MachineInstr &MI, unsigned &Delta) const ;
315
317
MachineInstr *findDefInLoop (Register Reg);
316
318
bool canUseLastOffsetValue (MachineInstr *MI, unsigned &BasePos,
317
319
unsigned &OffsetPos, unsigned &NewBase,
@@ -339,24 +341,56 @@ class NodeSet {
339
341
using iterator = SetVector<SUnit *>::const_iterator;
340
342
341
343
NodeSet () = default ;
342
- NodeSet (iterator S, iterator E) : Nodes(S, E), HasRecurrence(true ) {
343
- Latency = 0 ;
344
- for (const SUnit *Node : Nodes) {
345
- DenseMap<SUnit *, unsigned > SuccSUnitLatency;
346
- for (const SDep &Succ : Node->Succs ) {
347
- auto SuccSUnit = Succ.getSUnit ();
348
- if (!Nodes.count (SuccSUnit))
344
+ NodeSet (iterator S, iterator E, const SwingSchedulerDAG *DAG)
345
+ : Nodes(S, E), HasRecurrence(true ) {
346
+ // Calculate the latency of this node set.
347
+ // Example to demonstrate the calculation:
348
+ // Given: N0 -> N1 -> N2 -> N0
349
+ // Edges:
350
+ // (N0 -> N1, 3)
351
+ // (N0 -> N1, 5)
352
+ // (N1 -> N2, 2)
353
+ // (N2 -> N0, 1)
354
+ // The total latency which is a lower bound of the recurrence MII is the
355
+ // longest path from N0 back to N0 given only the edges of this node set.
356
+ // In this example, the latency is: 5 + 2 + 1 = 8.
357
+ //
358
+ // Hold a map from each SUnit in the circle to the maximum distance from the
359
+ // source node by only considering the nodes.
360
+ DenseMap<SUnit *, unsigned > SUnitToDistance;
361
+ for (auto *Node : Nodes)
362
+ SUnitToDistance[Node] = 0 ;
363
+
364
+ for (unsigned I = 1 , E = Nodes.size (); I <= E; ++I) {
365
+ SUnit *U = Nodes[I - 1 ];
366
+ SUnit *V = Nodes[I % Nodes.size ()];
367
+ for (const SDep &Succ : U->Succs ) {
368
+ SUnit *SuccSUnit = Succ.getSUnit ();
369
+ if (V != SuccSUnit)
349
370
continue ;
350
- unsigned CurLatency = Succ.getLatency ();
351
- unsigned MaxLatency = 0 ;
352
- if (SuccSUnitLatency.count (SuccSUnit))
353
- MaxLatency = SuccSUnitLatency[SuccSUnit];
354
- if (CurLatency > MaxLatency)
355
- SuccSUnitLatency[SuccSUnit] = CurLatency;
371
+ if (SUnitToDistance[U] + Succ.getLatency () > SUnitToDistance[V]) {
372
+ SUnitToDistance[V] = SUnitToDistance[U] + Succ.getLatency ();
373
+ }
356
374
}
357
- for (auto SUnitLatency : SuccSUnitLatency)
358
- Latency += SUnitLatency.second ;
359
375
}
376
+ // Handle a back-edge in loop carried dependencies
377
+ SUnit *FirstNode = Nodes[0 ];
378
+ SUnit *LastNode = Nodes[Nodes.size () - 1 ];
379
+
380
+ for (auto &PI : LastNode->Preds ) {
381
+ // If we have an order dep that is potentially loop carried then a
382
+ // back-edge exists between the last node and the first node that isn't
383
+ // modeled in the DAG. Handle it manually by adding 1 to the distance of
384
+ // the last node.
385
+ if (PI.getSUnit () != FirstNode || PI.getKind () != SDep::Order ||
386
+ !DAG->isLoopCarriedDep (LastNode, PI, false ))
387
+ continue ;
388
+ SUnitToDistance[FirstNode] =
389
+ std::max (SUnitToDistance[FirstNode], SUnitToDistance[LastNode] + 1 );
390
+ }
391
+
392
+ // The latency is the distance from the source node to itself.
393
+ Latency = SUnitToDistance[Nodes.front ()];
360
394
}
361
395
362
396
bool insert (SUnit *SU) { return Nodes.insert (SU); }
0 commit comments