Skip to content

Commit addfc97

Browse files
committed
WIP: Better dominators
This commit replaces the naive algorithm for replacing dominator trees by a faster implementation based on the Semi-NCA algorithm (reference in the code comments). LLVM recently switched to this algorithm and found it to be faster in practice than SLT (which it used before). It is also slightly easier to implement. More importantly though, it should easily extend to dynamic dominators. I'm hoping this will fix the performance problems with constructing dominators noted in #25927 as well as providing the basis for a dynamic dominator implementation to fix #29107.
1 parent 3b428c9 commit addfc97

File tree

1 file changed

+176
-4
lines changed

1 file changed

+176
-4
lines changed

base/compiler/ssair/domtree.jl

Lines changed: 176 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,17 +54,18 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
5454
return (bb, nothing)
5555
end
5656

57-
# Construct Dom Tree
58-
# Simple algorithm - TODO: Switch to the fast version (e.g. https://tanujkhattar.wordpress.com/2016/01/11/dominator-tree-of-a-directed-graph/)
59-
function construct_domtree(cfg::CFG)
57+
function naive_idoms(cfg::CFG)
6058
nblocks = length(cfg.blocks)
6159
dom_all = BitSet(1:nblocks)
6260
dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
6361
changed = true
6462
while changed
6563
changed = false
6664
for n = 2:nblocks
67-
isempty(cfg.blocks[n].preds) && continue
65+
if isempty(cfg.blocks[n].preds)
66+
isempty(dominators[n]) || (dominators[n] = BitSet())
67+
continue
68+
end
6869
firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, cfg.blocks[n].preds))
6970
new_doms = copy(dominators[firstp])
7071
for p in rest
@@ -91,7 +92,16 @@ function construct_domtree(cfg::CFG)
9192
idoms[i] = dom
9293
end
9394
end
95+
idoms
96+
end
97+
98+
# Construct Dom Tree
99+
function construct_domtree(cfg::CFG)
100+
idoms = SNCA(cfg)
101+
nidoms = naive_idoms(cfg)
102+
@assert idoms == nidoms
94103
# Compute children
104+
nblocks = length(cfg.blocks)
95105
domtree = DomTreeNode[DomTreeNode() for _ = 1:nblocks]
96106
for (idx, idom) in Iterators.enumerate(idoms)
97107
(idx == 1 || idom == 0) && continue
@@ -101,3 +111,165 @@ function construct_domtree(cfg::CFG)
101111
update_level!(domtree, 1, 1)
102112
DomTree(idoms, domtree)
103113
end
114+
115+
#================================ [SNCA] ======================================#
116+
#
117+
# This section implements the Semi-NCA (SNCA) dominator tree construction from
118+
# described in Georgiadis' PhD thesis [LG05], which itself is a simplification
119+
# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches
120+
# the algorithm choice in LLVM and seems to be a sweet spot in implementation
121+
# simplicity and efficiency.
122+
#
123+
# [LG05] Linear-Time Algorithms for Dominators and Related Problems
124+
# Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
125+
# ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
126+
#
127+
# [LT79] A fast algorithm for finding dominators in a flowgraph
128+
# Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1
129+
# http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf
130+
#
131+
begin
132+
# We could make these real structs, but probably not worth the extra
133+
# overhead. Still, give them names for documentary purposes.
134+
const BBNumber = UInt
135+
const DFSNumber = UInt
136+
137+
"""
138+
Keeps the per-BB state of the Semi NCA algorithm. In the original
139+
formulation, there are three separate length `n` arrays, `label`, `semi` and
140+
`ancestor`. Instead, for efficiency, we use one array in a array-of-structs
141+
style setup.
142+
"""
143+
struct Node
144+
semi::DFSNumber
145+
label::DFSNumber
146+
end
147+
148+
struct DFSTree
149+
# Maps DFS number to BB number
150+
numbering::Vector{BBNumber}
151+
# Maps BB number to DFS number
152+
reverse::Vector{DFSNumber}
153+
# Records parent relationships in the DFS tree (DFS number -> DFS number)
154+
# Storing it this way saves a few lookups in the snca_compress! algorithm
155+
parents::Vector{DFSNumber}
156+
end
157+
length(D::DFSTree) = length(D.numbering)
158+
preorder(D::DFSTree) = OneTo(length(D))
159+
_drop(xs::AbstractUnitRange, n::Integer) = (first(xs)+n):last(xs)
160+
161+
function DFSTree(nblocks::Int)
162+
DFSTree(
163+
Vector{BBNumber}(undef, nblocks),
164+
zeros(DFSNumber, nblocks),
165+
Vector{DFSNumber}(undef, nblocks))
166+
end
167+
168+
function DFS(cfg::CFG, current_node::BBNumber)::DFSTree
169+
dfs = DFSTree(length(cfg.blocks))
170+
# TODO: We could reuse the storage in DFSTree for our worklist. We're
171+
# guaranteed for the worklist to be smaller than the remaining space in
172+
# DFSTree
173+
worklist = Tuple{DFSNumber, BBNumber}[(0, current_node)]
174+
dfs_num = 1
175+
parent = 0
176+
while !isempty(worklist)
177+
(parent, current_node) = pop!(worklist)
178+
dfs.reverse[current_node] = dfs_num
179+
dfs.numbering[dfs_num] = current_node
180+
dfs.parents[dfs_num] = parent
181+
for succ in cfg.blocks[current_node].succs
182+
dfs.reverse[succ] != 0 && continue
183+
# Mark things that are currently in the worklist
184+
dfs.reverse[succ] = 1
185+
push!(worklist, (dfs_num, succ))
186+
end
187+
dfs_num += 1
188+
end
189+
# If all blocks are reachable, this is a no-op, otherwise,
190+
# we shrink these arrays.
191+
resize!(dfs.numbering, dfs_num - 1)
192+
resize!(dfs.parents, dfs_num - 1)
193+
dfs
194+
end
195+
196+
"""
197+
Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the
198+
modification suggested in the paper to use `last_linked` to determine
199+
whether an ancestor has been processed rather than storing `0` in the
200+
ancestor array.
201+
"""
202+
function snca_compress!(state::Vector{Node}, ancestors::Vector{DFSNumber},
203+
v::DFSNumber, last_linked::DFSNumber)
204+
u = ancestors[v]
205+
@assert u < v
206+
if u >= last_linked
207+
snca_compress!(state, ancestors, u, last_linked)
208+
if state[u].label < state[v].label
209+
state[v] = Node(state[v].semi, state[u].label)
210+
end
211+
ancestors[v] = ancestors[u]
212+
end
213+
nothing
214+
end
215+
216+
"""
217+
The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05].
218+
Note that the pseudocode in [LG05] is not entirely accurate.
219+
The best way to understand what's happening is to read [LT79], then the
220+
description of SLT in in [LG05] (warning: inconsistent notation), then
221+
the description of Semi-NCA.
222+
"""
223+
function SNCA(cfg::CFG)
224+
D = DFS(cfg, BBNumber(1))
225+
# `label` is initialized to the identity mapping (though
226+
# the paper doesn't make that clear). The rational for this is Lemma
227+
# 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't
228+
# ever look at `semi` until it is fully initialized, so we could leave
229+
# it unitialized here if we wanted to.
230+
state = Node[ Node(typemax(DFSNumber), w) for w in preorder(D) ]
231+
# Initialize idoms to parents. Note that while idoms are eventually
232+
# BB indexed, we keep it DFS indexed until a final post-processing
233+
# pass to avoid extra memory references during the O(N^2) phase below.
234+
idoms_dfs = copy(D.parents)
235+
# We abuse the parents array as the ancestors array.
236+
# Semi-NCA does not look at the parents array at all.
237+
# SLT would, but never simultaneously, so we could still
238+
# do this.
239+
ancestors = D.parents
240+
for w reverse(_drop(preorder(D), 1))
241+
# LLVM initializes this to the parent, the paper initializes this to
242+
# `w`, but it doesn't really matter (the parent is a predecessor,
243+
# so at worst we'll discover it below). Save a memory reference here.
244+
semi_w = typemax(DFSNumber)
245+
for v cfg.blocks[D.numbering[w]].preds
246+
# For the purpose of the domtree, ignore virtual predecessors
247+
# into catch blocks.
248+
v == 0 && continue
249+
vdfs = D.reverse[v]
250+
# Ignore unreachable predecessors
251+
vdfs == 0 && continue
252+
last_linked = DFSNumber(w + 1)
253+
# N.B.: This conditional is missing from the psuedocode
254+
# in figure 2.8 of [LG05]. It corresponds to the
255+
# `ancestor[v] != 0` check in the `eval` implementation in
256+
# figure 2.6
257+
if vdfs >= last_linked
258+
snca_compress!(state, ancestors, vdfs, last_linked)
259+
end
260+
semi_w = min(semi_w, state[vdfs].label)
261+
end
262+
state[w] = Node(semi_w, semi_w)
263+
end
264+
for v _drop(preorder(D), 1)
265+
idom = idoms_dfs[v]
266+
vsemi = state[v].semi
267+
while idom > vsemi
268+
idom = idoms_dfs[idom]
269+
end
270+
idoms_dfs[v] = idom
271+
end
272+
idoms_bb = Int[ (i == 1 || D.reverse[i] == 0) ? 0 : D.numbering[idoms_dfs[D.reverse[i]]] for i = 1:length(cfg.blocks) ]
273+
idoms_bb
274+
end
275+
end

0 commit comments

Comments
 (0)