Skip to content

Commit 0bff1f3

Browse files
Daniel Poetzldanpoe
Daniel Poetzl
authored andcommitted
Documentation for cfg dominator class
1 parent 026bd5b commit 0bff1f3

File tree

1 file changed

+116
-39
lines changed

1 file changed

+116
-39
lines changed

src/analyses/cfg_dominators.h

Lines changed: 116 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ Author: Georg Weissenbacher, [email protected]
2828
#include <goto-programs/goto_program.h>
2929
#include <goto-programs/cfg.h>
3030

31+
/// Dominator tree
32+
/// Node indices are 0-based here (unlike in the internal data structures of
33+
/// the algorithm).
3134
template <typename T, typename node_indext>
3235
struct dominators_datat
3336
{
@@ -41,13 +44,16 @@ struct dominators_datat
4144
: data(data), immediate_dominator(immediate_dominator)
4245
{
4346
}
47+
48+
/// Maps node to T
4449
std::vector<T> data;
50+
51+
/// Maps node to its immediate dominator
4552
std::vector<node_indext> immediate_dominator;
4653
};
4754

48-
/// An immutable set of dominators. Constant memory usage and creation time,
49-
/// but linear lookup time
50-
/// Immutability is necessary because the structure uses sharing
55+
/// An immutable set of dominators. Constant memory usage and creation time.
56+
/// Immutability is necessary because the structure uses sharing.
5157
template <typename T, typename node_indext>
5258
class dominatorst
5359
{
@@ -62,8 +68,8 @@ class dominatorst
6268

6369
public:
6470
/// Create an empty set
65-
/// Note: Only unreachable nodes should be assigned
66-
/// empty sets after the algorithm completes
71+
/// Note: Only unreachable nodes should be assigned empty sets after the
72+
/// algorithm completes
6773
dominatorst() : dominators_data(nullptr), node_index(npos), cached_distance(0)
6874
{
6975
}
@@ -180,26 +186,23 @@ class dominatorst
180186
/// Return an iterator node if node is in this dominators set, end() otherwise
181187
/// Note: O(n), when making many queries against the same set it is probably
182188
/// worth copying into a std::set
183-
184189
const_iterator find(const T &node) const
185190
{
186191
std::less<T> less;
187-
// FIXME This works around a bug in other parts of the code
188-
// in particular, dependence_graph.cpp,
189-
// where iterators to different lists than those that are
190-
// stored in this set are passed to find.
191-
// The Debug libstdc++ will (correctly!) run into an assertion failure
192-
// using std::find. std::less for some reason doesn't trigger this assertion
193-
// failure, so we use this as an ugly workaround until that code is fixed.
192+
// FIXME This works around a bug in other parts of the code in particular,
193+
// dependence_graph.cpp, where iterators to different lists than those that
194+
// are stored in this set are passed to find. The Debug libstdc++ will
195+
// (correctly!) run into an assertion failure using std::find. std::less for
196+
// some reason doesn't trigger this assertion failure, so we use this as an
197+
// ugly workaround until that code is fixed.
194198

195199
// NOLINTNEXTLINE
196200
return std::find_if(cbegin(), cend(), [&](const T &other_node) {
197201
return !less(node, other_node) && !less(other_node, node);
198202
});
199203
}
200204

201-
/// The size of the set; Linear time on the first call,
202-
/// constant after that
205+
/// The size of the set. Linear time on the first call, constant after that.
203206
std::size_t size() const
204207
{
205208
if(cached_distance == npos)
@@ -219,6 +222,7 @@ template <typename T, typename node_indext>
219222
const node_indext
220223
dominatorst<T, node_indext>::npos = std::numeric_limits<node_indext>::max();
221224

225+
/// Dominators for each instruction in a goto program
222226
template <class P, class T, bool post_dom>
223227
class cfg_dominators_templatet
224228
{
@@ -248,15 +252,17 @@ class cfg_dominators_templatet
248252
explicit fixedpointt(cfg_dominators_templatet &cfg_dominators)
249253
: cfg_dominators(cfg_dominators),
250254
dfs_counter(0),
255+
// Data structures have size cfg.size() + 1 as node indices are 1-based
256+
// to match the paper of Lengauer/Tarjan.
251257
parent(cfg_dominators.cfg.size() + 1),
252-
ancestor(cfg_dominators.cfg.size() + 1),
253-
child(cfg_dominators.cfg.size() + 1),
254258
vertex(cfg_dominators.cfg.size() + 1),
255259
dom(cfg_dominators.cfg.size() + 1),
256-
label(cfg_dominators.cfg.size() + 1),
257260
semi(cfg_dominators.cfg.size() + 1),
261+
bucket(cfg_dominators.cfg.size() + 1),
262+
ancestor(cfg_dominators.cfg.size() + 1),
263+
label(cfg_dominators.cfg.size() + 1),
258264
size(cfg_dominators.cfg.size() + 1),
259-
bucket(cfg_dominators.cfg.size() + 1)
265+
child(cfg_dominators.cfg.size() + 1)
260266
{
261267
}
262268

@@ -265,12 +271,34 @@ class cfg_dominators_templatet
265271
private:
266272
cfg_dominators_templatet &cfg_dominators;
267273
node_indext dfs_counter;
268-
std::vector<node_indext> parent, ancestor, child, vertex, dom;
269274

270-
std::vector<node_indext> label, semi, size;
275+
/// Maps node to its parent in the DFS-generated spanning tree
276+
std::vector<node_indext> parent;
277+
278+
/// Maps number to node (according to the DFS numbering)
279+
std::vector<node_indext> vertex;
280+
281+
/// Maps node to its immediate dominator
282+
std::vector<node_indext> dom;
271283

284+
/// Maps node to its semi-dominator (as defined by Lengauer/Tarjan)
285+
/// A semidominator of a node w is the minimum node v (according to the DFS
286+
/// numbering) for which there is a path from v to w such that all nodes
287+
/// occuring on that path (other than v, w) have a larger number than w
288+
/// (according to the DFS numbering)
289+
std::vector<node_indext> semi;
290+
291+
/// Maps node to the set of nodes of which it is the semi-dominator
272292
std::vector<std::set<node_indext>> bucket;
273293

294+
// Used by link() and eval(), which are used to create and query
295+
// an auxiliary data structure which is a forest that is contained
296+
// in the DFS spanning tree.
297+
std::vector<node_indext> ancestor;
298+
std::vector<node_indext> label;
299+
std::vector<node_indext> size;
300+
std::vector<node_indext> child;
301+
274302
T get_entry_node(P &program)
275303
{
276304
if(post_dom)
@@ -283,6 +311,9 @@ class cfg_dominators_templatet
283311
}
284312
};
285313

314+
/// DFS numbering
315+
/// Number nodes in the order in which they are reached during a DFS,
316+
/// intialize data structures
286317
void dfs(node_indext root)
287318
{
288319
struct dfs_statet
@@ -299,16 +330,20 @@ class cfg_dominators_templatet
299330
node_indext v = state.current;
300331
if(semi[v] == 0)
301332
{
333+
// Initialize data structures
302334
parent[v] = state.parent;
303335
semi[v] = ++dfs_counter;
304336
vertex[dfs_counter] = label[v] = v;
305337
ancestor[v] = child[v] = 0;
306338
size[v] = 1;
339+
// Explore children
307340
for_each_successor(v, [&](node_indext w) { work.push({v, w}); });
308341
}
309342
}
310343
}
311344

345+
/// Compress path from v to the root in the tree of the forest that contains
346+
/// v, by directly attaching nodes to the root
312347
void compress(node_indext v)
313348
{
314349
if(ancestor[ancestor[v]] != 0)
@@ -322,6 +357,8 @@ class cfg_dominators_templatet
322357
}
323358
}
324359

360+
/// Return node with minimum semidominator on the path from the root of the
361+
/// tree in the forest containing v to v, and compress path
325362
node_indext eval(node_indext v)
326363
{
327364
if(ancestor[v] == 0)
@@ -336,6 +373,9 @@ class cfg_dominators_templatet
336373
return label[ancestor[v]];
337374
}
338375

376+
/// Add an edge to the forest
377+
/// \param v: source node of edge
378+
/// \param w: target node of edge
339379
void link(node_indext v, node_indext w)
340380
{
341381
node_indext s = w;
@@ -365,15 +405,19 @@ class cfg_dominators_templatet
365405
}
366406
}
367407

408+
/// Fill output data structures
368409
void assign_dominators(node_indext root)
369410
{
411+
// Fill dominator tree output data structure
370412
auto dominators_data = std::make_shared<typename target_sett::datat>(
371413
cfg_dominators.cfg.size());
372414
for(node_indext i = 0; i < cfg_dominators.cfg.size(); ++i)
373415
{
374416
dominators_data->immediate_dominator[i] = dom[i + 1] - 1;
375417
dominators_data->data[i] = cfg_dominators.cfg[i].PC;
376418
}
419+
420+
// Assign immediate dominator to nodes in the cfg
377421
std::stack<node_indext> work;
378422
work.push(root);
379423
while(!work.empty())
@@ -389,10 +433,11 @@ class cfg_dominators_templatet
389433
}
390434
}
391435

436+
/// Perform action on each child node
392437
template <typename Action>
393438
void for_each_successor(node_indext node_index, Action action)
394439
{
395-
// the -1 / +1 adjusts indices from 1 based to 0 based and back
440+
// The -1 / +1 adjusts indices from 1 based to 0 based and back
396441
auto ix = node_index - 1;
397442
for(auto const &next :
398443
post_dom ? cfg_dominators.cfg.in(ix) : cfg_dominators.cfg.out(ix))
@@ -401,6 +446,7 @@ class cfg_dominators_templatet
401446
}
402447
}
403448

449+
/// Perform action on each parent node
404450
template <typename Action>
405451
void for_each_predecessor(node_indext node_index, Action action)
406452
{
@@ -414,38 +460,53 @@ class cfg_dominators_templatet
414460
};
415461
};
416462

463+
/// Dominator tree computation
464+
/// Follows "A fast algorithm for finding dominators in a flow graph" of
465+
/// Lengauer and Tarjan. Node indices are 1-based as in the paper, with the
466+
/// first element (with index 0) of each data structure simply left empty.
417467
template <class P, class T, bool post_dom>
418468
void cfg_dominators_templatet<P, T, post_dom>::fixedpointt::fixedpoint(
419469
P &program)
420470
{
421-
// Dominator Tree according to Lengauer and Tarjan
422-
// "A fast algorithm for finding dominators in a flow graph"
423-
// This is ununderstandable without reading the paper!
424-
// assumption: Vertex indices >= 0 and < cfg.size()
471+
// The nodes in the cfg data structure are represented by indices >= 0 and <
472+
// cfg.size(), whereas the internal data structures of the algorithm use
473+
// 1-based indices to represent nodes
425474
if(cfg_dominators.cfg.nodes_empty(program))
426475
{
427476
return;
428477
}
429478
cfg_dominators.entry_node = get_entry_node(program);
430479
node_indext root =
431480
cfg_dominators.cfg.entry_map[cfg_dominators.entry_node] + 1;
481+
482+
// The computation is carried out in four steps as given in the paper.
483+
484+
// Step 1
485+
// Number nodes in the order in which they are reached during DFS, and
486+
// initialize data structures
432487
dfs_counter = 0;
433488
dfs(root);
489+
434490
for(node_indext i = dfs_counter; i >= 2; --i)
435491
{
492+
// Step 2
493+
// Compute semidominators
436494
node_indext w = vertex[i];
437495
// NOLINTNEXTLINE
438496
for_each_predecessor(w, [&](node_indext v) {
439497
node_indext u = eval(v);
440-
// reachable nodes may have unreachable
441-
// nodes as their parents
498+
// Reachable nodes may have unreachable nodes as their parents
442499
if(semi[u] != 0 && semi[u] < semi[w])
443500
{
444501
semi[w] = semi[u];
445502
}
446503
});
504+
447505
bucket[vertex[semi[w]]].insert(w);
448506
link(parent[w], w);
507+
508+
// Step 3
509+
// Implicitely define immediate dominator
449510
auto &w_parent_bucket = bucket[parent[w]];
450511
for(auto v_it = begin(w_parent_bucket); v_it != end(w_parent_bucket);)
451512
{
@@ -462,6 +523,9 @@ void cfg_dominators_templatet<P, T, post_dom>::fixedpointt::fixedpoint(
462523
}
463524
}
464525
}
526+
527+
// Step 4
528+
// Compute immediate dominator
465529
for(node_indext i = 2; i <= dfs_counter; ++i)
466530
{
467531
node_indext w = vertex[i];
@@ -471,10 +535,14 @@ void cfg_dominators_templatet<P, T, post_dom>::fixedpointt::fixedpoint(
471535
}
472536
}
473537

538+
// Fill output data structures
474539
assign_dominators(root);
475540
}
476541

477542
/// Print the result of the dominator computation
543+
/// \param out: output stream
544+
/// \param cfg_dominators: structure containing the result of the dominator
545+
/// computation
478546
template <class P, class T, bool post_dom>
479547
std::ostream &operator<<(
480548
std::ostream &out,
@@ -500,23 +568,40 @@ void cfg_dominators_templatet<P, T, post_dom>::initialise(P &program)
500568
cfg(program);
501569
}
502570

503-
/// Pretty-print a single node in the dominator tree. Supply a specialisation if
504-
/// operator<< is not sufficient.
505-
/// \par parameters: `node` to print and stream `out` to pretty-print it to
571+
/// Pretty-print a single node. Supply a specialisation if operator<< is not
572+
/// sufficient.
573+
/// \param node: node to print
574+
/// \param out: output stream
506575
template <class T>
507576
void dominators_pretty_print_node(const T &node, std::ostream &out)
508577
{
509578
out << node;
510579
}
511580

581+
/// Pretty-print a single node.
582+
/// \param node: node to print
583+
/// \param out: output stream
584+
template <>
512585
inline void dominators_pretty_print_node(
513586
const goto_programt::targett &target,
514587
std::ostream &out)
515588
{
516589
out << target->code.pretty();
517590
}
518591

592+
/// Pretty-print a single node.
593+
/// \param node: node to print
594+
/// \param out: output stream
595+
template <>
596+
inline void dominators_pretty_print_node(
597+
const goto_programt::const_targett &node,
598+
std::ostream &out)
599+
{
600+
out << node->location_number;
601+
}
602+
519603
/// Print the result of the dominator computation
604+
/// \param out: output stream
520605
template <class P, class T, bool post_dom>
521606
void cfg_dominators_templatet<P, T, post_dom>::output(std::ostream &out) const
522607
{
@@ -551,12 +636,4 @@ typedef cfg_dominators_templatet<const goto_programt,
551636
true>
552637
cfg_post_dominatorst;
553638

554-
template <>
555-
inline void dominators_pretty_print_node(
556-
const goto_programt::const_targett &node,
557-
std::ostream &out)
558-
{
559-
out << node->location_number;
560-
}
561-
562639
#endif // CPROVER_ANALYSES_CFG_DOMINATORS_H

0 commit comments

Comments
 (0)