@@ -54,17 +54,18 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
54
54
return (bb, nothing )
55
55
end
56
56
57
- # Construct Dom Tree
58
- # Simple algorithm - TODO : Switch to the fast version (e.g. https://tanujkhattar.wordpress.com/2016/01/11/dominator-tree-of-a-directed-graph/)
59
- function construct_domtree (cfg:: CFG )
57
+ function naive_idoms (cfg:: CFG )
60
58
nblocks = length (cfg. blocks)
61
59
dom_all = BitSet (1 : nblocks)
62
60
dominators = BitSet[n == 1 ? BitSet (1 ) : copy (dom_all) for n = 1 : nblocks]
63
61
changed = true
64
62
while changed
65
63
changed = false
66
64
for n = 2 : nblocks
67
- isempty (cfg. blocks[n]. preds) && continue
65
+ if isempty (cfg. blocks[n]. preds)
66
+ isempty (dominators[n]) || (dominators[n] = BitSet ())
67
+ continue
68
+ end
68
69
firstp, rest = Iterators. peel (Iterators. filter (p-> p != 0 , cfg. blocks[n]. preds))
69
70
new_doms = copy (dominators[firstp])
70
71
for p in rest
@@ -91,7 +92,16 @@ function construct_domtree(cfg::CFG)
91
92
idoms[i] = dom
92
93
end
93
94
end
95
+ idoms
96
+ end
97
+
98
+ # Construct Dom Tree
99
+ function construct_domtree (cfg:: CFG )
100
+ idoms = SNCA (cfg)
101
+ nidoms = naive_idoms (cfg)
102
+ @assert idoms == nidoms
94
103
# Compute children
104
+ nblocks = length (cfg. blocks)
95
105
domtree = DomTreeNode[DomTreeNode () for _ = 1 : nblocks]
96
106
for (idx, idom) in Iterators. enumerate (idoms)
97
107
(idx == 1 || idom == 0 ) && continue
@@ -101,3 +111,165 @@ function construct_domtree(cfg::CFG)
101
111
update_level! (domtree, 1 , 1 )
102
112
DomTree (idoms, domtree)
103
113
end
114
+
115
+ #= =============================== [SNCA] ======================================#
116
+ #
117
+ # This section implements the Semi-NCA (SNCA) dominator tree construction from
118
+ # described in Georgiadis' PhD thesis [LG05], which itself is a simplification
119
+ # of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches
120
+ # the algorithm choice in LLVM and seems to be a sweet spot in implementation
121
+ # simplicity and efficiency.
122
+ #
123
+ # [LG05] Linear-Time Algorithms for Dominators and Related Problems
124
+ # Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
125
+ # ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
126
+ #
127
+ # [LT79] A fast algorithm for finding dominators in a flowgraph
128
+ # Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1
129
+ # http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf
130
+ #
131
+ begin
132
+ # We could make these real structs, but probably not worth the extra
133
+ # overhead. Still, give them names for documentary purposes.
134
+ const BBNumber = UInt
135
+ const DFSNumber = UInt
136
+
137
+ """
138
+ Keeps the per-BB state of the Semi NCA algorithm. In the original
139
+ formulation, there are three separate length `n` arrays, `label`, `semi` and
140
+ `ancestor`. Instead, for efficiency, we use one array in a array-of-structs
141
+ style setup.
142
+ """
143
+ struct Node
144
+ semi:: DFSNumber
145
+ label:: DFSNumber
146
+ end
147
+
148
+ struct DFSTree
149
+ # Maps DFS number to BB number
150
+ numbering:: Vector{BBNumber}
151
+ # Maps BB number to DFS number
152
+ reverse:: Vector{DFSNumber}
153
+ # Records parent relationships in the DFS tree (DFS number -> DFS number)
154
+ # Storing it this way saves a few lookups in the snca_compress! algorithm
155
+ parents:: Vector{DFSNumber}
156
+ end
157
+ length (D:: DFSTree ) = length (D. numbering)
158
+ preorder (D:: DFSTree ) = OneTo (length (D))
159
+ _drop (xs:: AbstractUnitRange , n:: Integer ) = (first (xs)+ n): last (xs)
160
+
161
+ function DFSTree (nblocks:: Int )
162
+ DFSTree (
163
+ Vector {BBNumber} (undef, nblocks),
164
+ zeros (DFSNumber, nblocks),
165
+ Vector {DFSNumber} (undef, nblocks))
166
+ end
167
+
168
+ function DFS (cfg:: CFG , current_node:: BBNumber ):: DFSTree
169
+ dfs = DFSTree (length (cfg. blocks))
170
+ # TODO : We could reuse the storage in DFSTree for our worklist. We're
171
+ # guaranteed for the worklist to be smaller than the remaining space in
172
+ # DFSTree
173
+ worklist = Tuple{DFSNumber, BBNumber}[(0 , current_node)]
174
+ dfs_num = 1
175
+ parent = 0
176
+ while ! isempty (worklist)
177
+ (parent, current_node) = pop! (worklist)
178
+ dfs. reverse[current_node] = dfs_num
179
+ dfs. numbering[dfs_num] = current_node
180
+ dfs. parents[dfs_num] = parent
181
+ for succ in cfg. blocks[current_node]. succs
182
+ dfs. reverse[succ] != 0 && continue
183
+ # Mark things that are currently in the worklist
184
+ dfs. reverse[succ] = 1
185
+ push! (worklist, (dfs_num, succ))
186
+ end
187
+ dfs_num += 1
188
+ end
189
+ # If all blocks are reachable, this is a no-op, otherwise,
190
+ # we shrink these arrays.
191
+ resize! (dfs. numbering, dfs_num - 1 )
192
+ resize! (dfs. parents, dfs_num - 1 )
193
+ dfs
194
+ end
195
+
196
+ """
197
+ Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the
198
+ modification suggested in the paper to use `last_linked` to determine
199
+ whether an ancestor has been processed rather than storing `0` in the
200
+ ancestor array.
201
+ """
202
+ function snca_compress! (state:: Vector{Node} , ancestors:: Vector{DFSNumber} ,
203
+ v:: DFSNumber , last_linked:: DFSNumber )
204
+ u = ancestors[v]
205
+ @assert u < v
206
+ if u >= last_linked
207
+ snca_compress! (state, ancestors, u, last_linked)
208
+ if state[u]. label < state[v]. label
209
+ state[v] = Node (state[v]. semi, state[u]. label)
210
+ end
211
+ ancestors[v] = ancestors[u]
212
+ end
213
+ nothing
214
+ end
215
+
216
+ """
217
+ The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05].
218
+ Note that the pseudocode in [LG05] is not entirely accurate.
219
+ The best way to understand what's happening is to read [LT79], then the
220
+ description of SLT in in [LG05] (warning: inconsistent notation), then
221
+ the description of Semi-NCA.
222
+ """
223
+ function SNCA (cfg:: CFG )
224
+ D = DFS (cfg, BBNumber (1 ))
225
+ # `label` is initialized to the identity mapping (though
226
+ # the paper doesn't make that clear). The rational for this is Lemma
227
+ # 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't
228
+ # ever look at `semi` until it is fully initialized, so we could leave
229
+ # it unitialized here if we wanted to.
230
+ state = Node[ Node (typemax (DFSNumber), w) for w in preorder (D) ]
231
+ # Initialize idoms to parents. Note that while idoms are eventually
232
+ # BB indexed, we keep it DFS indexed until a final post-processing
233
+ # pass to avoid extra memory references during the O(N^2) phase below.
234
+ idoms_dfs = copy (D. parents)
235
+ # We abuse the parents array as the ancestors array.
236
+ # Semi-NCA does not look at the parents array at all.
237
+ # SLT would, but never simultaneously, so we could still
238
+ # do this.
239
+ ancestors = D. parents
240
+ for w ∈ reverse (_drop (preorder (D), 1 ))
241
+ # LLVM initializes this to the parent, the paper initializes this to
242
+ # `w`, but it doesn't really matter (the parent is a predecessor,
243
+ # so at worst we'll discover it below). Save a memory reference here.
244
+ semi_w = typemax (DFSNumber)
245
+ for v ∈ cfg. blocks[D. numbering[w]]. preds
246
+ # For the purpose of the domtree, ignore virtual predecessors
247
+ # into catch blocks.
248
+ v == 0 && continue
249
+ vdfs = D. reverse[v]
250
+ # Ignore unreachable predecessors
251
+ vdfs == 0 && continue
252
+ last_linked = DFSNumber (w + 1 )
253
+ # N.B.: This conditional is missing from the psuedocode
254
+ # in figure 2.8 of [LG05]. It corresponds to the
255
+ # `ancestor[v] != 0` check in the `eval` implementation in
256
+ # figure 2.6
257
+ if vdfs >= last_linked
258
+ snca_compress! (state, ancestors, vdfs, last_linked)
259
+ end
260
+ semi_w = min (semi_w, state[vdfs]. label)
261
+ end
262
+ state[w] = Node (semi_w, semi_w)
263
+ end
264
+ for v ∈ _drop (preorder (D), 1 )
265
+ idom = idoms_dfs[v]
266
+ vsemi = state[v]. semi
267
+ while idom > vsemi
268
+ idom = idoms_dfs[idom]
269
+ end
270
+ idoms_dfs[v] = idom
271
+ end
272
+ idoms_bb = Int[ (i == 1 || D. reverse[i] == 0 ) ? 0 : D. numbering[idoms_dfs[D. reverse[i]]] for i = 1 : length (cfg. blocks) ]
273
+ idoms_bb
274
+ end
275
+ end
0 commit comments