diff --git a/base/compiler/ssair/basicblock.jl b/base/compiler/ssair/basicblock.jl new file mode 100644 index 0000000000000..427aae707e664 --- /dev/null +++ b/base/compiler/ssair/basicblock.jl @@ -0,0 +1,32 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +""" +Like UnitRange{Int}, but can handle the `last` field, being temporarily +< first (this can happen during compacting) +""" +struct StmtRange <: AbstractUnitRange{Int} + start::Int + stop::Int +end + +first(r::StmtRange) = r.start +last(r::StmtRange) = r.stop +iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1) + +StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range)) + +struct BasicBlock + stmts::StmtRange + preds::Vector{Int} + succs::Vector{Int} +end + +function BasicBlock(stmts::StmtRange) + return BasicBlock(stmts, Int[], Int[]) +end + +function BasicBlock(old_bb, stmts) + return BasicBlock(stmts, old_bb.preds, old_bb.succs) +end + +copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs)) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index f9b407f9ddb3e..86e72fba7982a 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -1,63 +1,665 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# This file implements the Semi-NCA (SNCA) dominator tree construction +# described in Georgiadis' PhD thesis [LG05], which itself is a simplification +# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches +# the algorithm choice in LLVM and seems to be a sweet spot in implementation +# simplicity and efficiency. +# +# This file also implements an extension of SNCA that supports updating the +# dominator tree with insertion and deletion of edges in the control flow +# graph, described in [GI16] as Dynamic SNCA. DSNCA was chosen over DBS, a +# different algorithm which achieves the best overall performance in [GI16], +# because it is simpler to understand and implement, performs well with edge +# deletions, and is of similar performance overall. +# +# SNCA works by first computing semidominators, then computing immediate +# dominators from them. The semidominator of a node is the node with minimum +# preorder number such that there is a semidominator path from it to the node. +# A semidominator path is a path in which the preorder numbers of all nodes not +# at the endpoints are greater than the preorder number of the last node. +# Intuitively, the semidominator approximates the immediate dominator of a node +# by taking the path (in the CFG) that gets as close to the root as possible +# while avoiding ancestors of the node in the DFS tree. +# +# In computing the semidominators, SNCA performs "path compression" whenever a +# node has a nontrivial semidominator (i.e. a semidominator that is not just +# its parent in the DFS tree). Path compression propagates the "label" of a +# node, which represents a possible semidominator with associated semidominator +# path passing through that node. +# +# For example, path compression will be performed for the following CFG, where +# the edge not in the DFS tree is marked with asterisks. Note that nodes are +# labeled with their preorder numbers, and all edges point downward. +# +# 1 +# |\ +# | \ +# | 4 +# | | +# 2 5 +# | | +# | 6 +# | * +# |* +# 3 +# +# There is a nontrivial semidominator path from 1 to 3, passing through 4, 5, +# and 6. Stepping through the whole algorithm on paper with an example like +# this is very helpful for understanding how it works. +# +# DSNCA runs the whole algorithm from scratch if the DFS tree is invalidated by +# the insertion or deletion, but otherwise recomputes a subset of the +# semidominators (all immediate dominators then need to be recomputed). +# +# [LG05] Linear-Time Algorithms for Dominators and Related Problems +# Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: +# ftp://ftp.cs.princeton.edu/reports/2005/737.pdf +# +# [LT79] A fast algorithm for finding dominators in a flowgraph +# Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1 +# http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf +# +# [GI16] An Experimental Study of Dynamic Dominators +# Loukas Georgiadis, Giuseppe F. Italiano, Luigi Laura, Federico +# Santaroni, April 2016 +# https://arxiv.org/abs/1604.02711 + +# We could make these real structs, but probably not worth the extra +# overhead. Still, give them names for documentary purposes. +const BBNumber = Int +const PreNumber = Int +const PostNumber = Int + +struct DFSTree + # These map between BB number and pre- or postorder numbers + to_pre::Vector{PreNumber} + from_pre::Vector{BBNumber} + to_post::Vector{PostNumber} + from_post::Vector{BBNumber} + + # Records parent relationships in the DFS tree + # (preorder number -> preorder number) + # Storing it this way saves a few lookups in the snca_compress! algorithm + to_parent_pre::Vector{PreNumber} +end + +function DFSTree(n_blocks::Int) + return DFSTree(zeros(PreNumber, n_blocks), + Vector{BBNumber}(undef, n_blocks), + zeros(PostNumber, n_blocks), + Vector{BBNumber}(undef, n_blocks), + zeros(PreNumber, n_blocks)) +end + +copy(D::DFSTree) = DFSTree(copy(D.to_pre), + copy(D.from_pre), + copy(D.to_post), + copy(D.from_post), + copy(D.to_parent_pre)) + +function copy!(dst::DFSTree, src::DFSTree) + copy!(dst.to_pre, src.to_pre) + copy!(dst.from_pre, src.from_pre) + copy!(dst.to_post, src.to_post) + copy!(dst.from_post, src.from_post) + copy!(dst.to_parent_pre, src.to_parent_pre) + return dst +end + +length(D::DFSTree) = length(D.from_pre) + +function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) + copy!(D, DFSTree(length(blocks))) + to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)] + pre_num = 1 + post_num = 1 + while !isempty(to_visit) + # Because we want the postorder number as well as the preorder number, + # we don't pop the current node from the stack until we're moving up + # the tree + (current_node_bb, parent_pre, pushed_children) = to_visit[end] + + if pushed_children + # Going up the DFS tree, so all we need to do is record the + # postorder number, then move on + D.to_post[current_node_bb] = post_num + D.from_post[post_num] = current_node_bb + post_num += 1 + pop!(to_visit) + + elseif D.to_pre[current_node_bb] != 0 + # Node has already been visited, move on + pop!(to_visit) + continue + else + # Going down the DFS tree + + # Record preorder number + D.to_pre[current_node_bb] = pre_num + D.from_pre[pre_num] = current_node_bb + D.to_parent_pre[pre_num] = parent_pre + + # Record that children (will) have been pushed + to_visit[end] = (current_node_bb, parent_pre, true) + + # Push children to the stack + for succ_bb in blocks[current_node_bb].succs + push!(to_visit, (succ_bb, pre_num, false)) + end + + pre_num += 1 + end + end + + # If all blocks are reachable, this is a no-op, otherwise, we shrink these + # arrays. + resize!(D.from_pre, pre_num - 1) + resize!(D.from_post, post_num - 1) # should be same size as pre_num - 1 + resize!(D.to_parent_pre, pre_num - 1) + + return D +end + +DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks) + +""" +Keeps the per-BB state of the Semi NCA algorithm. In the original formulation, +there are three separate length `n` arrays, `label`, `semi` and `ancestor`. +Instead, for efficiency, we use one array in a array-of-structs style setup. +""" +struct SNCAData + semi::PreNumber + label::PreNumber +end + +copy(d::SNCAData) = SNCAData(d.semi, d.label) + "Represents a Basic Block, in the DomTree" struct DomTreeNode # How deep we are in the DomTree level::Int # The BB indices in the CFG for all Basic Blocks we immediately dominate - children::Vector{Int} + children::Vector{BBNumber} end -DomTreeNode() = DomTreeNode(1, Vector{Int}()) + +DomTreeNode() = DomTreeNode(1, Vector{BBNumber}()) + +copy(n::DomTreeNode) = DomTreeNode(n.level, copy(n.children)) "Data structure that encodes which basic block dominates which." struct DomTree - # Which basic block immediately dominates each basic block (ordered by BB indices) - # Note: this is the inverse of the nodes, children field - idoms::Vector{Int} + # These can be reused when updating domtree dynamically + dfs_tree::DFSTree + snca_state::Vector{SNCAData} + + # Which basic block immediately dominates each basic block, using BB indices + idoms_bb::Vector{BBNumber} # The nodes in the tree (ordered by BB indices) nodes::Vector{DomTreeNode} end +DomTree() = DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) + +copy(d::DomTree) = DomTree(copy(d.dfs_tree), + copy(d.snca_state), + copy(d.idoms_bb), + copy(d.nodes)) + +function construct_domtree(blocks::Vector{BasicBlock}) + return update_domtree!(blocks, DomTree(), true, 0) +end + +function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree, + recompute_dfs::Bool, max_pre::PreNumber) + if recompute_dfs + DFS!(domtree.dfs_tree, blocks) + end + + if max_pre == 0 + max_pre = length(domtree.dfs_tree) + end + + SNCA!(domtree, blocks, max_pre) + compute_domtree_nodes!(domtree) + return domtree +end + +function compute_domtree_nodes!(domtree::DomTree) + # Compute children + copy!(domtree.nodes, + DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)]) + for (idx, idom) in Iterators.enumerate(domtree.idoms_bb) + (idx == 1 || idom == 0) && continue + push!(domtree.nodes[idom].children, idx) + end + # Recursively set level + update_level!(domtree.nodes, 1, 1) + return domtree.nodes +end + +function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int) + worklist = Tuple{BBNumber, Int}[(node, level)] + while !isempty(worklist) + (node, level) = pop!(worklist) + nodes[node] = DomTreeNode(level, nodes[node].children) + foreach(nodes[node].children) do child + push!(worklist, (child, level+1)) + end + end +end + +""" +The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. Note that the +pseudocode in [LG05] is not entirely accurate. The best way to understand +what's happening is to read [LT79], then the description of SLT in [LG05] +(warning: inconsistent notation), then the description of Semi-NCA. +""" +function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) + D = domtree.dfs_tree + state = domtree.snca_state + # There may be more blocks than are reachable in the DFS / dominator tree + n_blocks = length(blocks) + n_nodes = length(D) + + # `label` is initialized to the identity mapping (though the paper doesn't + # make that clear). The rationale for this is Lemma 2.4 in [LG05] (i.e. + # Theorem 4 in [LT79]). Note however, that we don't ever look at `semi` + # until it is fully initialized, so we could leave it uninitialized here if + # we wanted to. + resize!(state, n_nodes) + for w in 1:max_pre + # Only reset semidominators for nodes we want to recompute + state[w] = SNCAData(typemax(PreNumber), w) + end + + # If we are only recomputing some of the semidominators, the remaining + # labels should be reset, because they may have become inapplicable to the + # node/semidominator we are currently processing/recomputing. They can + # become inapplicable because of path compressions that were triggered by + # nodes that should only be processed after the current one (but were + # processed the last time `SNCA!` was run). + # + # So, for every node that is not being reprocessed, we reset its label to + # its semidominator, which is the value that its label assumes once its + # semidominator is computed. If this was too conservative, i.e. if the + # label would have been updated before we process the current node in a + # situation where all semidominators were recomputed, then path compression + # will produce the correct label. + for w in max_pre+1:n_nodes + semi = state[w].semi + state[w] = SNCAData(semi, semi) + end + + # Calculate semidominators, but only for blocks with preorder number up to + # max_pre + ancestors = copy(D.to_parent_pre) + for w::PreNumber in reverse(2:max_pre) + # LLVM initializes this to the parent, the paper initializes this to + # `w`, but it doesn't really matter (the parent is a predecessor, so at + # worst we'll discover it below). Save a memory reference here. + semi_w = typemax(PreNumber) + last_linked = PreNumber(w + 1) + for v ∈ blocks[D.from_pre[w]].preds + # For the purpose of the domtree, ignore virtual predecessors into + # catch blocks. + v == 0 && continue + + v_pre = D.to_pre[v] + + # Ignore unreachable predecessors + v_pre == 0 && continue + + # N.B.: This conditional is missing from the pseudocode in figure + # 2.8 of [LG05]. It corresponds to the `ancestor[v] != 0` check in + # the `eval` implementation in figure 2.6 + if v_pre >= last_linked + # `v` has already been processed, so perform path compression + + # For performance, if the number of ancestors is small avoid + # the extra allocation of the worklist. + if length(ancestors) <= 32 + snca_compress!(state, ancestors, v_pre, last_linked) + else + snca_compress_worklist!(state, ancestors, v_pre, last_linked) + end + end + + # The (preorder number of the) semidominator of a block is the + # minimum over the labels of its predecessors + semi_w = min(semi_w, state[v_pre].label) + end + state[w] = SNCAData(semi_w, semi_w) + end + + # Compute immediate dominators, which for a node must be the nearest common + # ancestor in the (immediate) dominator tree between its semidominator and + # its parent (see Lemma 2.6 in [LG05]). + idoms_pre = copy(D.to_parent_pre) + for v in 2:n_nodes + idom = idoms_pre[v] + vsemi = state[v].semi + while idom > vsemi + idom = idoms_pre[idom] + end + idoms_pre[v] = idom + end + + # Express idoms in BB indexing + resize!(domtree.idoms_bb, n_blocks) + for i::BBNumber in 1:n_blocks + if i == 1 || D.to_pre[i] == 0 + domtree.idoms_bb[i] = 0 + else + domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]] + end + end +end + +""" +Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the +modification suggested in the paper to use `last_linked` to determine whether +an ancestor has been processed rather than storing `0` in the ancestor array. +""" +function snca_compress!(state::Vector{SNCAData}, ancestors::Vector{PreNumber}, + v::PreNumber, last_linked::PreNumber) + u = ancestors[v] + @assert u < v + if u >= last_linked + snca_compress!(state, ancestors, u, last_linked) + if state[u].label < state[v].label + state[v] = SNCAData(state[v].semi, state[u].label) + end + ancestors[v] = ancestors[u] + end + nothing +end + +function snca_compress_worklist!( + state::Vector{SNCAData}, ancestors::Vector{PreNumber}, + v::PreNumber, last_linked::PreNumber) + # TODO: There is a smarter way to do this + u = ancestors[v] + worklist = Tuple{PreNumber, PreNumber}[(u,v)] + @assert u < v + while !isempty(worklist) + u, v = last(worklist) + if u >= last_linked + if ancestors[u] >= last_linked + push!(worklist, (ancestors[u], u)) + continue + end + if state[u].label < state[v].label + state[v] = SNCAData(state[v].semi, state[u].label) + end + ancestors[v] = ancestors[u] + end + pop!(worklist) + end +end + +"Given updated blocks, update the given dominator tree with an inserted edge." +function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, + from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + + # Implements Section 3.1 of [GI16] + dt = domtree.dfs_tree + from_pre = dt.to_pre[from] + to_pre = dt.to_pre[to] + from_post = dt.to_post[from] + to_post = dt.to_post[to] + if to_pre == 0 || (from_pre < to_pre && from_post < to_post) + # The DFS tree is invalidated by the edge insertion, so run from + # scratch + update_domtree!(blocks, domtree, true, 0) + else + # DFS tree is still valid, so update only affected nodes + update_domtree!(blocks, domtree, false, to_pre) + end + + return domtree +end + +"Given updated blocks, update the given dominator tree with a deleted edge." +function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, + from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + + # Implements Section 3.1 of [GI16] + if is_parent(domtree.dfs_tree, from, to) + # The `from` block is the parent of the `to` block in the DFS tree, so + # deleting the edge invalidates the DFS tree, so start from scratch + update_domtree!(blocks, domtree, true, 0) + elseif on_semidominator_path(domtree, from, to) + # Recompute semidominators for blocks with preorder number up to that + # of `to` block. Semidominators for blocks with preorder number greater + # than that of `to` aren't affected because no semidominator path to + # the block can pass through the `to` block (the preorder number of + # `to` would be lower than those of these blocks, and `to` is not their + # parent in the DFS tree). + to_pre = domtree.dfs_tree.to_pre[to] + update_domtree!(blocks, domtree, false, to_pre) + end + # Otherwise, dominator tree is not affected + + return domtree +end + +"Check if x is the parent of y in the given DFS tree." +function is_parent(dfs_tree::DFSTree, x::BBNumber, y::BBNumber) + x_pre = dfs_tree.to_pre[x] + y_pre = dfs_tree.to_pre[y] + return x_pre == dfs_tree.to_parent_pre[y_pre] +end + +""" +Check if x is on some semidominator path from the semidominator of y to y, +assuming there is an edge from x to y. +""" +function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber) + x_pre = domtree.dfs_tree.to_pre[x] + y_pre = domtree.dfs_tree.to_pre[y] + + semi_y = domtree.snca_state[y_pre].semi + current_block = x_pre + + # Follow the semidominators of `x` up the DFS tree to see if we ever reach + # the semidominator of `y`. If so, `x` is on a semidominator path between + # `y` and its semidominator. We can stop if the preorder number of the + # semidominators becomes less than that of the semidominator of `y`, + # because it can only decrease further. + while current_block >= semi_y + if semi_y == current_block + return true + end + current_block = domtree.snca_state[current_block].semi + end + return false +end + +""" +Rename basic block numbers in a dominator tree, removing the block if it is +renamed to -1. +""" +function rename_blocks!(domtree::DomTree, rename_bb::Vector{BBNumber}) + # Rename DFS tree + rename_blocks!(domtree.dfs_tree, rename_bb) + + # `snca_state` is indexed by preorder number, so should be unchanged + + # Rename `idoms_bb` and `nodes` + old_idoms_bb = copy(domtree.idoms_bb) + old_nodes = copy(domtree.nodes) + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + domtree.idoms_bb[new_bb] = (new_bb == 1) ? + 0 : rename_bb[old_idoms_bb[old_bb]] + domtree.nodes[new_bb] = old_nodes[old_bb] + map!(i -> rename_bb[i], + domtree.nodes[new_bb].children, + domtree.nodes[new_bb].children) + end + end + + # length of `to_pre` after renaming DFS tree is new number of basic blocks + resize!(domtree.idoms_bb, length(domtree.dfs_tree.to_pre)) + resize!(domtree.nodes, length(domtree.dfs_tree.to_pre)) + return domtree +end + """ - Checks if bb1 dominates bb2. - bb1 and bb2 are indexes into the CFG blocks. - bb1 dominates bb2 if the only way to enter bb2 is via bb1. - (Other blocks may be in between, e.g bb1->bbX->bb2). +Rename basic block numbers in a DFS tree, removing the block if it is renamed +to -1. """ -function dominates(domtree::DomTree, bb1::Int, bb2::Int) +function rename_blocks!(D::DFSTree, rename_bb::Vector{BBNumber}) + n_blocks = length(D.to_pre) + n_reachable_blocks = length(D.from_pre) + + old_to_pre = copy(D.to_pre) + old_from_pre = copy(D.from_pre) + old_to_post = copy(D.to_post) + old_from_post = copy(D.from_post) + max_new_bb = 0 + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + D.to_pre[new_bb] = old_to_pre[old_bb] + D.from_pre[old_to_pre[old_bb]] = new_bb + D.to_post[new_bb] = old_to_post[old_bb] + D.from_post[old_to_post[old_bb]] = new_bb + + # Keep track of highest BB number to resize arrays with + if new_bb > max_new_bb + max_new_bb = new_bb + end + end + end + resize!(D.to_pre, max_new_bb) + resize!(D.to_post, max_new_bb) + # `to_parent_pre` should be unchanged + return D +end + +""" +Combine two blocks `from` and `to`. It is assumed that there is an edge from +`from` to `to`, and that the sole successor of `from` is `to` and the sole +predecessor of `to` is `from`. This does not remove the `to` node, which is +then considered to have no edges. +""" +function combine_blocks!(domtree::DomTree, from::BBNumber, to::BBNumber) + orig_pre_of_to = domtree.dfs_tree.to_pre[to] + + # Combine nodes in DFS tree + combine_blocks!(domtree.dfs_tree, from, to) + + # Update `snca_state` + deleteat!(domtree.snca_state, orig_pre_of_to) + for pre in 1:length(domtree.snca_state) + # If semidominator was `to`, set it to `from` instead + if domtree.snca_state[pre].semi == orig_pre_of_to + label = domtree.snca_state[pre].label + domtree.snca_state[pre] = SNCAData(orig_pre_of_to - 1, label) + end + # Labels will be reset anyway, so no need to update them + end + + # Update `idoms_bb` + for bb in 1:length(domtree.idoms_bb) + # If immediate dominator was `to`, set it to `from` instead + if domtree.idoms_bb[bb] == to + domtree.idoms_bb[bb] = from + end + end + + # Update `nodes` + copy!(domtree.nodes[from].children, domtree.nodes[to].children) + domtree.nodes[to] = DomTreeNode() + # Recursively decrement level of the affected children + worklist = domtree.nodes[from].children + while !isempty(worklist) + node = pop!(worklist) + domtree.nodes[node] = DomTreeNode(domtree.nodes[node].level-1, + domtree.nodes[node].children) + foreach(child -> push!(worklist, child), + domtree.nodes[node].children) + end + + return domtree +end + +function combine_blocks!(D::DFSTree, from::BBNumber, to::BBNumber) + # Reassignment of preorder and postorder numbers depend on these + # conditions, which should hold if `from` has only one successor `to` and + # `to` has only one predecessor `from`. + @assert D.to_pre[from] + 1 == D.to_pre[to] + @assert D.to_post[from] == D.to_post[to] + 1 + + # Set preorder number of `to` to 0, + # decrement preorder numbers greater than original preorder number of `to`. + # Set postorder number of `to` to 0, + # decrement postorder numbers greater than original preorder number of `to`. + + orig_pre_of_to = D.to_pre[to] + orig_post_of_to = D.to_post[to] + + D.to_pre[to] = 0 + D.to_post[to] = 0 + for bb in 1:length(D.to_pre) + if D.to_pre[bb] > orig_pre_of_to + D.to_pre[bb] -= 1 + end + if D.to_post[bb] > orig_post_of_to + D.to_post[bb] -= 1 + end + end + deleteat!(D.from_pre, orig_pre_of_to) + deleteat!(D.from_post, orig_post_of_to) + + deleteat!(D.to_parent_pre, orig_pre_of_to) + for pre in 1:length(D.to_parent_pre) + if D.to_parent_pre[pre] > orig_pre_of_to + D.to_parent_pre[pre] -= 1 + end + end + + return D +end + +""" +Checks if bb1 dominates bb2. +bb1 and bb2 are indexes into the CFG blocks. +bb1 dominates bb2 if the only way to enter bb2 is via bb1. +(Other blocks may be in between, e.g bb1->bbX->bb2). +""" +function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) bb1 == bb2 && return true target_level = domtree.nodes[bb1].level source_level = domtree.nodes[bb2].level source_level < target_level && return false for _ in (source_level - 1):-1:target_level - bb2 = domtree.idoms[bb2] + bb2 = domtree.idoms_bb[bb2] end return bb1 == bb2 end -bb_unreachable(domtree::DomTree, bb::Int) = bb != 1 && domtree.nodes[bb].level == 1 - -function update_level!(domtree::Vector{DomTreeNode}, node::Int, level::Int) - worklist = Tuple{Int, Int}[(node, level)] - while !isempty(worklist) - (node, level) = pop!(worklist) - domtree[node] = DomTreeNode(level, domtree[node].children) - foreach(domtree[node].children) do child - push!(worklist, (child, level+1)) - end - end -end +bb_unreachable(domtree::DomTree, bb::BBNumber) = bb != 1 && domtree.dfs_tree.to_pre[bb] == 0 "Iterable data structure that walks though all dominated blocks" struct DominatedBlocks domtree::DomTree - worklist::Vector{Int} + worklist::Vector{BBNumber} end "Returns an iterator that walks through all blocks dominated by the basic block at index `root`" -function dominated(domtree::DomTree, root::Int) - doms = DominatedBlocks(domtree, Vector{Int}()) +function dominated(domtree::DomTree, root::BBNumber) + doms = DominatedBlocks(domtree, Vector{BBNumber}()) push!(doms.worklist, root) doms end @@ -71,8 +673,8 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing) return (bb, nothing) end -function naive_idoms(cfg::CFG) - nblocks = length(cfg.blocks) +function naive_idoms(blocks::Vector{BasicBlock}) + nblocks = length(blocks) # The extra +1 helps us detect unreachable blocks below dom_all = BitSet(1:nblocks+1) dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks] @@ -80,10 +682,10 @@ function naive_idoms(cfg::CFG) while changed changed = false for n = 2:nblocks - if isempty(cfg.blocks[n].preds) + if isempty(blocks[n].preds) continue end - firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, cfg.blocks[n].preds)) + firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds)) new_doms = copy(dominators[firstp]) for p in rest intersect!(new_doms, dominators[p]) @@ -115,213 +717,3 @@ function naive_idoms(cfg::CFG) end idoms end - -# Construct Dom Tree -function construct_domtree(cfg::CFG) - idoms = SNCA(cfg) - # Compute children - nblocks = length(cfg.blocks) - domtree = DomTreeNode[DomTreeNode() for _ = 1:nblocks] - for (idx, idom) in Iterators.enumerate(idoms) - (idx == 1 || idom == 0) && continue - push!(domtree[idom].children, idx) - end - # Recursively set level - update_level!(domtree, 1, 1) - DomTree(idoms, domtree) -end - -#================================ [SNCA] ======================================# -# -# This section implements the Semi-NCA (SNCA) dominator tree construction from -# described in Georgiadis' PhD thesis [LG05], which itself is a simplification -# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches -# the algorithm choice in LLVM and seems to be a sweet spot in implementation -# simplicity and efficiency. -# -# [LG05] Linear-Time Algorithms for Dominators and Related Problems -# Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: -# ftp://ftp.cs.princeton.edu/reports/2005/737.pdf -# -# [LT79] A fast algorithm for finding dominators in a flowgraph -# Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1 -# http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf -# -begin - # We could make these real structs, but probably not worth the extra - # overhead. Still, give them names for documentary purposes. - const BBNumber = UInt - const DFSNumber = UInt - - """ - Keeps the per-BB state of the Semi NCA algorithm. In the original - formulation, there are three separate length `n` arrays, `label`, `semi` and - `ancestor`. Instead, for efficiency, we use one array in a array-of-structs - style setup. - """ - struct Node - semi::DFSNumber - label::DFSNumber - end - - struct DFSTree - # Maps DFS number to BB number - numbering::Vector{BBNumber} - # Maps BB number to DFS number - reverse::Vector{DFSNumber} - # Records parent relationships in the DFS tree (DFS number -> DFS number) - # Storing it this way saves a few lookups in the snca_compress! algorithm - parents::Vector{DFSNumber} - end - length(D::DFSTree) = length(D.numbering) - preorder(D::DFSTree) = OneTo(length(D)) - _drop(xs::AbstractUnitRange, n::Integer) = (first(xs)+n):last(xs) - - function DFSTree(nblocks::Int) - DFSTree( - Vector{BBNumber}(undef, nblocks), - zeros(DFSNumber, nblocks), - Vector{DFSNumber}(undef, nblocks)) - end - - function DFS(cfg::CFG, current_node::BBNumber)::DFSTree - dfs = DFSTree(length(cfg.blocks)) - # TODO: We could reuse the storage in DFSTree for our worklist. We're - # guaranteed for the worklist to be smaller than the remaining space in - # DFSTree - worklist = Tuple{DFSNumber, BBNumber}[(0, current_node)] - dfs_num = 1 - parent = 0 - while !isempty(worklist) - (parent, current_node) = pop!(worklist) - dfs.reverse[current_node] != 0 && continue - dfs.reverse[current_node] = dfs_num - dfs.numbering[dfs_num] = current_node - dfs.parents[dfs_num] = parent - for succ in cfg.blocks[current_node].succs - push!(worklist, (dfs_num, succ)) - end - dfs_num += 1 - end - # If all blocks are reachable, this is a no-op, otherwise, - # we shrink these arrays. - resize!(dfs.numbering, dfs_num - 1) - resize!(dfs.parents, dfs_num - 1) - dfs - end - - """ - Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the - modification suggested in the paper to use `last_linked` to determine - whether an ancestor has been processed rather than storing `0` in the - ancestor array. - """ - function snca_compress!(state::Vector{Node}, ancestors::Vector{DFSNumber}, - v::DFSNumber, last_linked::DFSNumber) - u = ancestors[v] - @assert u < v - if u >= last_linked - snca_compress!(state, ancestors, u, last_linked) - if state[u].label < state[v].label - state[v] = Node(state[v].semi, state[u].label) - end - ancestors[v] = ancestors[u] - end - nothing - end - - function snca_compress_worklist!( - state::Vector{Node}, ancestors::Vector{DFSNumber}, - v::DFSNumber, last_linked::DFSNumber) - # TODO: There is a smarter way to do this - u = ancestors[v] - worklist = Tuple{DFSNumber, DFSNumber}[(u,v)] - @assert u < v - while !isempty(worklist) - u, v = last(worklist) - if u >= last_linked - if ancestors[u] >= last_linked - push!(worklist, (ancestors[u], u)) - continue - end - if state[u].label < state[v].label - state[v] = Node(state[v].semi, state[u].label) - end - ancestors[v] = ancestors[u] - end - pop!(worklist) - end - end - - """ - SNCA(cfg::CFG) - - Determines a map from basic blocks to the block which immediately dominate them. - Expressed as indexes into `cfg.blocks`. - - The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. - Note that the pseudocode in [LG05] is not entirely accurate. - The best way to understand what's happening is to read [LT79], then the - description of SLT in [LG05] (warning: inconsistent notation), then - the description of Semi-NCA. - """ - function SNCA(cfg::CFG) - D = DFS(cfg, BBNumber(1)) - # `label` is initialized to the identity mapping (though - # the paper doesn't make that clear). The rational for this is Lemma - # 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't - # ever look at `semi` until it is fully initialized, so we could leave - # it uninitialized here if we wanted to. - state = Node[ Node(typemax(DFSNumber), w) for w in preorder(D) ] - # Initialize idoms to parents. Note that while idoms are eventually - # BB indexed, we keep it DFS indexed until a final post-processing - # pass to avoid extra memory references during the O(N^2) phase below. - idoms_dfs = copy(D.parents) - # We abuse the parents array as the ancestors array. - # Semi-NCA does not look at the parents array at all. - # SLT would, but never simultaneously, so we could still - # do this. - ancestors = D.parents - for w::DFSNumber ∈ reverse(_drop(preorder(D), 1)) - # LLVM initializes this to the parent, the paper initializes this to - # `w`, but it doesn't really matter (the parent is a predecessor, - # so at worst we'll discover it below). Save a memory reference here. - semi_w = typemax(DFSNumber) - for v ∈ cfg.blocks[D.numbering[w]].preds - # For the purpose of the domtree, ignore virtual predecessors - # into catch blocks. - v == 0 && continue - vdfs = D.reverse[v] - # Ignore unreachable predecessors - vdfs == 0 && continue - last_linked = DFSNumber(w + 1) - # N.B.: This conditional is missing from the psuedocode - # in figure 2.8 of [LG05]. It corresponds to the - # `ancestor[v] != 0` check in the `eval` implementation in - # figure 2.6 - if vdfs >= last_linked - # For performance, if the number of ancestors is small - # avoid the extra allocation of the worklist. - if length(ancestors) <= 32 - snca_compress!(state, ancestors, vdfs, last_linked) - else - snca_compress_worklist!(state, ancestors, vdfs, last_linked) - end - end - semi_w = min(semi_w, state[vdfs].label) - end - state[w] = Node(semi_w, semi_w) - end - for v ∈ _drop(preorder(D), 1) - idom = idoms_dfs[v] - vsemi = state[v].semi - while idom > vsemi - idom = idoms_dfs[idom] - end - idoms_dfs[v] = idom - end - # Reexpress the idom relationship in BB indexing - idoms_bb = Int[ (i == 1 || D.reverse[i] == 0) ? 0 : D.numbering[idoms_dfs[D.reverse[i]]] for i = 1:length(cfg.blocks) ] - idoms_bb - end -end diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index 0ff6943aa5408..1f6470fed9046 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -10,8 +10,9 @@ else end end -include("compiler/ssair/ir.jl") +include("compiler/ssair/basicblock.jl") include("compiler/ssair/domtree.jl") +include("compiler/ssair/ir.jl") include("compiler/ssair/slot2ssa.jl") include("compiler/ssair/queries.jl") include("compiler/ssair/passes.jl") @@ -111,9 +112,8 @@ end function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code - @timeit "domtree 1" domtree = construct_domtree(ir.cfg) defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst) - @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` + @timeit "construct_ssa" ir = construct_ssa!(ci, ir, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` return ir end @@ -122,14 +122,12 @@ function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState) ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, nargs, sv) ir = slot2reg(ir, ci, nargs, sv) #@Base.show ("after_construct", ir) - # TODO: Domsorting can produce an updated domtree - no need to recompute here @timeit "compact 1" ir = compact!(ir) @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv) #@timeit "verify 2" verify_ir(ir) ir = compact!(ir) #@Base.show ("before_sroa", ir) - @timeit "domtree 2" domtree = construct_domtree(ir.cfg) - @timeit "SROA" ir = getfield_elim_pass!(ir, domtree) + @timeit "SROA" ir = getfield_elim_pass!(ir) #@Base.show ir.new_nodes #@Base.show ("after_sroa", ir) ir = adce_pass!(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 926491223795c..4b9b7f5fd41e4 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -298,6 +298,7 @@ function finish_cfg_inline!(state::CFGInliningState) end end +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -415,6 +416,7 @@ end const fatal_type_bound_error = ErrorException("fatal error in type inference (type bound)") +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -497,7 +499,9 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, end function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool) - # Compute the new CFG first (modulo statement ranges, which will be computed below) + # Compute the new CFG first (modulo statement ranges, which will be + # computed below, and the domtree, which will be updated below before we + # iterate through the statements) state = CFGInliningState(ir) for item in todo if isa(item, UnionSplit) @@ -518,6 +522,9 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo let compact = IncrementalCompact(ir, false) compact.result_bbs = state.new_cfg_blocks + # Recompute the domtree now that the CFG has been modified + compact.result_domtree = construct_domtree(compact.result_bbs) + # This needs to be a minimum and is more of a size hint nn = 0 for item in todo @@ -572,7 +579,6 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo compact[idx] = PhiNode(Any[edge == length(state.bb_rename) ? length(state.new_cfg_blocks) : state.bb_rename[edge+1]-1 for edge in stmt.edges], stmt.values) end end - ir = finish(compact) end return ir diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 621072527a334..4688a14a6fb8c 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -3,37 +3,35 @@ @inline isexpr(@nospecialize(stmt), head::Symbol) = isa(stmt, Expr) && stmt.head === head Core.PhiNode() = Core.PhiNode(Any[], Any[]) -""" -Like UnitRange{Int}, but can handle the `last` field, being temporarily -< first (this can happen during compacting) -""" -struct StmtRange <: AbstractUnitRange{Int} - start::Int - stop::Int -end -first(r::StmtRange) = r.start -last(r::StmtRange) = r.stop -iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1) - -StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range)) - -struct BasicBlock - stmts::StmtRange - preds::Vector{Int} - succs::Vector{Int} -end -function BasicBlock(stmts::StmtRange) - return BasicBlock(stmts, Int[], Int[]) -end -function BasicBlock(old_bb, stmts) - return BasicBlock(stmts, old_bb.preds, old_bb.succs) -end -copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs)) +isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode) struct CFG blocks::Vector{BasicBlock} index::Vector{Int} # map from instruction => basic-block number # TODO: make this O(1) instead of O(log(n_blocks))? + domtree::DomTree +end + +function CFG(blocks::Vector{BasicBlock}, index::Vector{Int}) + return CFG(blocks, index, construct_domtree(blocks)) +end + +copy(c::CFG) = CFG(BasicBlock[copy(b) for b in c.blocks], + copy(c.index), + copy(c.domtree)) + +function cfg_insert_edge!(cfg::CFG, from::Int, to::Int) + # Assumes that this edge does not already exist + push!(cfg.blocks[to].preds, from) + push!(cfg.blocks[from].succs, to) + domtree_insert_edge!(cfg.domtree, cfg.blocks, from, to) + nothing +end + +function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) + kill_edge!(cfg.blocks, from, to) + domtree_delete_edge!(cfg.domtree, cfg.blocks, from, to) + nothing end function block_for_inst(index::Vector{Int}, inst::Int) @@ -283,11 +281,13 @@ function setindex!(x::IRCode, @nospecialize(repl), s::SSAValue) return x end - +# SSA values that need renaming struct OldSSAValue id::Int end +# SSA values that are in `new_new_nodes` of an `IncrementalCompact` and are to +# be actually inserted next time (they become `new_nodes` next time) struct NewSSAValue id::Int end @@ -500,9 +500,15 @@ mutable struct IncrementalCompact ir::IRCode result::InstructionStream result_bbs::Vector{BasicBlock} + result_domtree::DomTree + ssa_rename::Vector{Any} + # `bb_rename_pred` is for renaming predecessors to the blocks they have + # been merged into (differs from `bb_rename_succ` only if blocks are + # combined), `bb_rename_succ` is how blocks are actually renamed bb_rename_pred::Vector{Int} bb_rename_succ::Vector{Int} + used_ssas::Vector{Int} late_fixup::Vector{Int} perm::Vector{Int} @@ -512,6 +518,7 @@ mutable struct IncrementalCompact # TODO: Switch these two to a min-heap of some sort pending_nodes::NewNodeStream # New nodes that were after the compaction point at insertion time pending_perm::Vector{Int} + # State idx::Int result_idx::Int @@ -519,7 +526,8 @@ mutable struct IncrementalCompact renamed_new_nodes::Bool cfg_transforms_enabled::Bool fold_constant_branches::Bool - function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false) + + function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=true) # Sort by position with attach after nodes after regular ones perm = my_sortperm(Int[let new_node = code.new_nodes.info[i] (new_node.pos * 2 + Int(new_node.attach_after)) @@ -532,7 +540,7 @@ mutable struct IncrementalCompact bb_rename = Vector{Int}(undef, length(blocks)) cur_bb = 1 for i = 1:length(bb_rename) - if i != 1 && length(blocks[i].preds) == 0 + if bb_unreachable(code.cfg.domtree, i) bb_rename[i] = -1 else bb_rename[i] = cur_bb @@ -556,20 +564,24 @@ mutable struct IncrementalCompact end end let blocks = blocks - result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1] + result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) + if bb_rename[i] != -1] end + result_domtree = rename_blocks!(copy(code.cfg.domtree), bb_rename) else bb_rename = Vector{Int}() result_bbs = code.cfg.blocks + result_domtree = code.cfg.domtree end ssa_rename = Any[SSAValue(i) for i = 1:new_len] late_fixup = Vector{Int}() new_new_nodes = NewNodeStream() pending_nodes = NewNodeStream() pending_perm = Int[] - return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms) + return new(code, result, result_bbs, result_domtree, ssa_rename, + bb_rename, bb_rename, used_ssas, late_fixup, perm, 1, + new_new_nodes, pending_nodes, pending_perm, 1, 1, 1, false, + allow_cfg_transforms, allow_cfg_transforms) end # For inlining @@ -583,11 +595,11 @@ mutable struct IncrementalCompact new_new_nodes = NewNodeStream() pending_nodes = NewNodeStream() pending_perm = Int[] - return new(code, parent.result, - parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas, - late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, result_offset, parent.active_result_bb, false, false, false) + return new(code, parent.result, parent.result_bbs, + parent.result_domtree, ssa_rename, bb_rename, bb_rename, + parent.used_ssas, late_fixup, perm, 1, new_new_nodes, + pending_nodes, pending_perm, 1, result_offset, + parent.active_result_bb, false, false, false) end end @@ -820,7 +832,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} return values end -function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int}, do_rename_ssa::Bool) +function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, do_rename_ssa::Bool) id = val.id if id > length(ssanums) return val @@ -829,14 +841,14 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int val = ssanums[id] end if isa(val, SSAValue) - if used_ssa !== nothing - used_ssa[val.id] += 1 + if used_ssas !== nothing + used_ssas[val.id] += 1 end end return val end -function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) +function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) urs = userefs(stmt) for op in urs val = op[] @@ -844,7 +856,7 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vec push!(late_fixup, result_idx) end if isa(val, SSAValue) - val = renumber_ssa2(val, ssanums, used_ssa, do_rename_ssa) + val = renumber_ssa2(val, ssanums, used_ssas, do_rename_ssa) end if isa(val, OldSSAValue) || isa(val, NewSSAValue) push!(late_fixup, result_idx) @@ -859,59 +871,82 @@ function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int) preds, succs = bbs[to].preds, bbs[from].succs deleteat!(preds, findfirst(x->x === from, preds)::Int) deleteat!(succs, findfirst(x->x === to, succs)::Int) - if length(preds) == 0 - for succ in copy(bbs[to].succs) - kill_edge!(bbs, to, succ) - end - end + nothing end # N.B.: from and to are non-renamed indices function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int) - # Note: We recursively kill as many edges as are obviously dead. However, this - # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or - # worstcase during codegen). - preds, succs = compact.result_bbs[compact.bb_rename_succ[to]].preds, compact.result_bbs[compact.bb_rename_pred[from]].succs - deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int) - deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int) - # Check if the block is now dead - if length(preds) == 0 - for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs) - kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred)) + renamed_from = compact.bb_rename_pred[from] + renamed_to = compact.bb_rename_succ[to] + + preds = compact.result_bbs[renamed_to].preds + succs = compact.result_bbs[renamed_from].succs + + deleteat!(preds, findfirst(x -> x === renamed_from, preds)::Int) + deleteat!(succs, findfirst(x -> x === renamed_to, succs)::Int) + + domtree_delete_edge!(compact.result_domtree, + compact.result_bbs, + renamed_from, + renamed_to) + + # Recursively kill edges to dead blocks. This is not necessary for removing + # dead blocks with a subsequent `IncrementalCompact`, but killing all the + # statements in dead blocks allows passes not to have to worry about + # whether statements have become invalid by becoming dead. + if bb_unreachable(compact.result_domtree, renamed_to) + for succ in copy(compact.result_bbs[renamed_to].succs) + # Make sure edge hasn't already been killed in a previous iteration + # of this loop + if succ in compact.result_bbs[renamed_to].succs + kill_edge!(compact, active_bb, to, + findfirst(x -> x === succ, compact.bb_rename_pred)) + end end - if to < active_bb - # Kill all statements in the block - stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts + + # Kill all statements in dead successor if it has already been + # processed. Also kill statements if dead successor is the active BB, + # in order to kill statements that have already been processed, + # although statements in the current BB that have not been processed + # will be killed in `process_node!`. Statements in dead blocks that + # have not yet been processed (`to` > `active_bb`) will be killed in + # `process_node!`. + if to <= active_bb + stmts = compact.result_bbs[renamed_to].stmts for stmt in stmts compact.result[stmt][:inst] = nothing end compact.result[last(stmts)][:inst] = ReturnNode() end - else - # We need to remove this edge from any phi nodes - if to < active_bb - idx = first(compact.result_bbs[compact.bb_rename_succ[to]].stmts) - while idx < length(compact.result) - stmt = compact.result[idx][:inst] - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x === compact.bb_rename_pred[from], stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end + end + + # Remove this edge from any phi nodes + if to < active_bb + stmts = compact.result_bbs[renamed_to].stmts + idx = first(stmts) + while idx <= last(stmts) + stmt = compact.result[idx][:inst] + if stmt === nothing idx += 1 + continue end - else - idx = first(compact.ir.cfg.blocks[to].stmts) - for stmt in CompactPeekIterator(compact, idx) - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x === from, stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end + isa(stmt, PhiNode) || break + i = findfirst(x -> x === renamed_from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) + end + idx += 1 + end + else + stmts = compact.ir.cfg.blocks[to].stmts + for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) + stmt === nothing && continue + isa(stmt, PhiNode) || break + i = findfirst(x -> x === from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) end end end @@ -925,7 +960,19 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr late_fixup = compact.late_fixup used_ssas = compact.used_ssas ssa_rename[idx] = SSAValue(result_idx) - if stmt === nothing + if compact.cfg_transforms_enabled && + # Don't kill statements in blocks renamed to -1 because + # `cfg_simplify!` renames blocks that have been merged into others + # to -1, but the statements in them are not dead. Unreachable + # blocks detected in the constructor of `IncrementalCompact` are + # removed then, so we never encounter statements from them here. + compact.bb_rename_succ[active_bb] != -1 && + bb_unreachable(compact.result_domtree, + compact.bb_rename_succ[active_bb]) + # Kill statements in dead blocks + result[result_idx][:inst] = isterminator(stmt) ? ReturnNode() : nothing + result_idx += 1 + elseif stmt === nothing ssa_rename[idx] = stmt elseif isa(stmt, OldSSAValue) ssa_rename[idx] = ssa_rename[stmt.id] @@ -984,15 +1031,56 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa) result_idx += 1 elseif isa(stmt, PhiNode) - values = process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa) - if length(stmt.edges) == 1 && isassigned(values, 1) && + if compact.cfg_transforms_enabled + # Rename phi node edges + map!(i -> compact.bb_rename_pred[i], stmt.edges, stmt.edges) + + # Remove edges and values associated with dead blocks. Entries in + # `values` can be undefined when the phi node refers to something + # that is not defined. (This is not a sign that the compiler is + # unintentionally leaving some entries in `values` uninitialized.) + # For example, consider a reference to a variable that is only + # defined if some branch is taken. + # + # In order to leave undefined values undefined (undefined-ness is + # not a value we can copy), we copy only the edges and (defined) + # values we want to keep to new arrays initialized with undefined + # elements. + edges = Vector{Any}(undef, length(stmt.edges)) + values = Vector{Any}(undef, length(stmt.values)) + new_index = 1 + for old_index in 1:length(stmt.edges) + if stmt.edges[old_index] != -1 + edges[new_index] = stmt.edges[old_index] + if isassigned(stmt.values, old_index) + values[new_index] = stmt.values[old_index] + end + new_index += 1 + end + end + resize!(edges, new_index-1) + resize!(values, new_index-1) + else + edges = stmt.edges + values = stmt.values + end + + values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa) + # Don't remove the phi node if it is before the definition of its value + # because doing so can create forward references. This should only + # happen with dead loops, but can cause problems when optimization + # passes look at all code, dead or not. This check should be + # unnecessary when DCE can remove those dead loops entirely, so this is + # just to be safe. + before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) && + idx < values[1].id + if length(edges) == 1 && isassigned(values, 1) && !before_def && length(compact.cfg_transforms_enabled ? compact.result_bbs[compact.bb_rename_succ[active_bb]].preds : compact.ir.cfg.blocks[active_bb].preds) == 1 # There's only one predecessor left - just replace it ssa_rename[idx] = values[1] else - edges = compact.cfg_transforms_enabled ? map!(i->compact.bb_rename_pred[i], stmt.edges, stmt.edges) : stmt.edges result[result_idx][:inst] = PhiNode(edges, values) result_idx += 1 end @@ -1085,10 +1173,19 @@ end struct CompactPeekIterator compact::IncrementalCompact start_idx::Int + end_idx::Int +end + +function CompactPeekIterator(compact::IncrementalCompact, start_idx::Int) + return CompactPeekIterator(compact, start_idx, 0) end entry_at_idx(entry::NewNodeInfo, idx::Int) = entry.attach_after ? entry.pos == idx - 1 : entry.pos == idx function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.start_idx, it.compact.new_nodes_idx, 1)) + if it.end_idx > 0 && idx > it.end_idx + return nothing + end + # TODO: Take advantage of the fact that these arrays are sorted # TODO: this return value design is horrible compact = it.compact @@ -1125,8 +1222,8 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= resize!(compact, old_result_idx) end bb = compact.ir.cfg.blocks[active_bb] - if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && length(bb.preds) == 0 - # No predecessors, kill the entire block. + if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && compact.bb_rename_succ[active_bb] == -1 + # Dead block, so kill the entire block. compact.idx = last(bb.stmts) # Pop any remaining insertion nodes while compact.new_nodes_idx <= length(compact.perm) @@ -1146,7 +1243,7 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= # Move to next block compact.idx += 1 if finish_current_bb!(compact, active_bb, old_result_idx, true) - return iterate(compact, (compact.idx-1, active_bb + 1)) + return iterate(compact, (compact.idx, active_bb + 1)) else return Pair{Pair{Int, Int}, Any}(Pair{Int,Int}(compact.idx-1, old_result_idx), compact.result[old_result_idx][:inst]), (compact.idx, active_bb + 1) end @@ -1173,7 +1270,6 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= # result_idx is not, incremented, but that's ok and expected compact.result[old_result_idx] = compact.ir.stmts[idx] result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx, active_bb, true) - stmt_if_any = old_result_idx == result_idx ? nothing : compact.result[old_result_idx][:inst] compact.result_idx = result_idx if idx == last(bb.stmts) && !attach_after_stmt_after(compact, idx) finish_current_bb!(compact, active_bb, old_result_idx) @@ -1250,10 +1346,17 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt)) for ur in urs val = ur[] if isa(val, NewSSAValue) - ur[] = SSAValue(length(compact.result) + val.id) + val = SSAValue(length(compact.result) + val.id) elseif isa(val, OldSSAValue) - ur[] = compact.ssa_rename[val.id] + val = compact.ssa_rename[val.id] + end + if isa(val, SSAValue) && val.id <= length(compact.used_ssas) + # If `val.id` is greater than the length of `compact.result` or + # `compact.used_ssas`, this SSA value is in `new_new_nodes`, so + # don't count the use + compact.used_ssas[val.id] += 1 end + ur[] = val end return urs[] end @@ -1307,11 +1410,13 @@ end function complete(compact::IncrementalCompact) result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1) - cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)]) + cfg = CFG(result_bbs, + Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)], + compact.result_domtree) return IRCode(compact.ir, compact.result, cfg, compact.new_new_nodes) end -function compact!(code::IRCode, allow_cfg_transforms::Bool=false) +function compact!(code::IRCode, allow_cfg_transforms::Bool=true) compact = IncrementalCompact(code, allow_cfg_transforms) # Just run through the iterator without any processing for _ in compact; end # _ isa Pair{Int, Any} diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index a2dc6d6e75f60..a780e5f8f9271 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -55,7 +55,7 @@ function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int) # TODO: This can be much faster by looking at current level and only # searching for those blocks in a sorted order while !(curblock in allblocks) - curblock = domtree.idoms[curblock] + curblock = domtree.idoms_bb[curblock] end return curblock end @@ -69,8 +69,8 @@ function val_for_def_expr(ir::IRCode, def::Int, fidx::Int) end end -function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) - curblock = find_curblock(domtree, allblocks, curblock) +function compute_value_for_block(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) + curblock = find_curblock(ir.cfg.domtree, allblocks, curblock) def = 0 for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock @@ -80,10 +80,10 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx) end -function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int) +function compute_value_for_use(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int) # Find the first dominating def curblock = stmtblock = block_for_inst(ir.cfg, use_idx) - curblock = find_curblock(domtree, allblocks, curblock) + curblock = find_curblock(ir.cfg.domtree, allblocks, curblock) defblockdefs = Int[stmt for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock] def = 0 if !isempty(defblockdefs) @@ -105,7 +105,7 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I if !haskey(phinodes, curblock) # If this happens, we need to search the predecessors for defs. Which # one doesn't matter - if it did, we'd have had a phinode - return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) + return compute_value_for_block(ir, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) end # The use is the phinode return phinodes[curblock] @@ -524,7 +524,7 @@ function perform_lifting!(compact::IncrementalCompact, end assertion_counter = 0 -function getfield_elim_pass!(ir::IRCode, domtree::DomTree) +function getfield_elim_pass!(ir::IRCode) compact = IncrementalCompact(ir) insertions = Vector{Any}() defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}() @@ -787,7 +787,7 @@ function getfield_elim_pass!(ir::IRCode, domtree::DomTree) ldu = compute_live_ins(ir.cfg, du) phiblocks = Int[] if !isempty(ldu.live_in_bbs) - phiblocks = idf(ir.cfg, ldu, domtree) + phiblocks = idf(ir.cfg, ldu) end phinodes = IdDict{Int, SSAValue}() for b in phiblocks @@ -797,19 +797,19 @@ function getfield_elim_pass!(ir::IRCode, domtree::DomTree) # Now go through all uses and rewrite them allblocks = sort(vcat(phiblocks, ldu.def_bbs)) for stmt in du.uses - ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt) + ir[SSAValue(stmt)] = compute_value_for_use(ir, allblocks, du, phinodes, fidx, stmt) end if !isbitstype(fieldtype(typ, fidx)) for (use, list) in preserve_uses - push!(list, compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, use)) + push!(list, compute_value_for_use(ir, allblocks, du, phinodes, fidx, use)) end end for b in phiblocks for p in ir.cfg.blocks[b].preds n = ir[phinodes[b]] push!(n.edges, p) - push!(n.values, compute_value_for_block(ir, domtree, - allblocks, du, phinodes, fidx, p)) + push!(n.values, compute_value_for_block( + ir, allblocks, du, phinodes, fidx, p)) end end end @@ -1032,32 +1032,57 @@ end function cfg_simplify!(ir::IRCode) bbs = ir.cfg.blocks + old_domtree = ir.cfg.domtree + new_domtree = copy(old_domtree) + merge_into = zeros(Int, length(bbs)) merged_succ = zeros(Int, length(bbs)) + function follow_merge_into(idx::Int) + while merge_into[idx] != 0 + idx = merge_into[idx] + end + return idx + end + function follow_merged_succ(idx::Int) + while merged_succ[idx] != 0 + idx = merged_succ[idx] + end + return idx + end - # Walk the CFG at from the entry block and aggressively combine blocks + # Walk the CFG from the entry block and aggressively combine blocks for (idx, bb) in enumerate(bbs) if length(bb.succs) == 1 succ = bb.succs[1] if length(bbs[succ].preds) == 1 - merge_into[succ] = idx - merged_succ[idx] = succ + # Prevent cycles by making sure we don't end up back at `idx` + # by following what is to be merged into `succ` + if follow_merged_succ(succ) != idx + # Update domtree, by combining the block that `idx` is + # going to be merged into with the block that `succ` was + # going to be merged into (before this merge) + combine_blocks!(new_domtree, + follow_merge_into(idx), + follow_merge_into(succ)) + + merge_into[succ] = idx + merged_succ[idx] = succ + end end end end + + # Assign new BB numbers max_bb_num = 1 bb_rename_succ = zeros(Int, length(bbs)) - # Lay out the basic blocks for i = 1:length(bbs) - if merge_into[i] != 0 - bb_rename_succ[i] = -1 - continue - end - # Drop unreachable blocks - if i != 1 && length(ir.cfg.blocks[i].preds) == 0 + # Drop blocks that will be merged away or are unreachable + if merge_into[i] != 0 || bb_unreachable(old_domtree, i) bb_rename_succ[i] = -1 end + bb_rename_succ[i] != 0 && continue + curr = i while true bb_rename_succ[curr] = max_bb_num @@ -1065,9 +1090,7 @@ function cfg_simplify!(ir::IRCode) # Now walk the chain of blocks we merged. # If we end in something that may fall through, # we have to schedule that block next - while merged_succ[curr] != 0 - curr = merged_succ[curr] - end + curr = follow_merged_succ(curr) terminator = ir.stmts[ir.cfg.blocks[curr].stmts[end]][:inst] if isa(terminator, GotoNode) || isa(terminator, ReturnNode) break @@ -1075,19 +1098,24 @@ function cfg_simplify!(ir::IRCode) curr += 1 end end + + # Figure out how predecessors should be renamed bb_rename_pred = zeros(Int, length(bbs)) for i = 1:length(bbs) if merged_succ[i] != 0 + # Block `i` should no longer be a predecessor (before renaming) + # because it is being merged with its sole successor bb_rename_pred[i] = -1 continue end - bbnum = i - while merge_into[bbnum] != 0 - bbnum = merge_into[bbnum] - end + bbnum = follow_merge_into(i) bb_rename_pred[i] = bb_rename_succ[bbnum] end + + # Compute map from new to old blocks result_bbs = Int[findfirst(j->i==j, bb_rename_succ) for i = 1:max_bb_num-1] + + # Compute new block lengths result_bbs_lengths = zeros(Int, max_bb_num-1) for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb @@ -1096,41 +1124,32 @@ function cfg_simplify!(ir::IRCode) ms = merged_succ[ms] end end + + # Compute statement indices the new blocks start at bb_starts = Vector{Int}(undef, 1+length(result_bbs_lengths)) bb_starts[1] = 1 for i = 1:length(result_bbs_lengths) bb_starts[i+1] = bb_starts[i] + result_bbs_lengths[i] end - # Figure out the pred and succ lists for each basic block in our merged result - cresult_bbs = let result_bbs = result_bbs, - merged_succ = merged_succ, - merge_into = merge_into, - bbs = bbs, - bb_rename_succ = bb_rename_succ - function compute_succs(i) - # Look at the original successor - orig_bb = result_bbs[i] - while merged_succ[orig_bb] != 0 - orig_bb = merged_succ[orig_bb] - end - newsuccs = map(i->bb_rename_succ[i], bbs[orig_bb].succs) - return newsuccs - end - function compute_preds(i) - orig_bb = result_bbs[i] - preds = bbs[orig_bb].preds - newpreds = map(preds) do pred - while merge_into[pred] != 0 - pred = merge_into[pred] - end - bb_rename_succ[pred] - end - return newpreds - end - BasicBlock[BasicBlock( - StmtRange(bb_starts[i], i + 1 > length(bb_starts) ? length(compact.result) : bb_starts[i + 1] - 1), - compute_preds(i), compute_succs(i)) for i = 1:length(result_bbs)] - end + + # Compute (renamed) successors and predecessors given (renamed) block + function compute_succs(i) + orig_bb = follow_merged_succ(result_bbs[i]) + return map(i -> bb_rename_succ[i], bbs[orig_bb].succs) + end + function compute_preds(i) + orig_bb = result_bbs[i] + preds = bbs[orig_bb].preds + return map(pred -> bb_rename_pred[pred], preds) + end + + cresult_bbs = BasicBlock[ + BasicBlock(StmtRange(bb_starts[i], + i+1 > length(bb_starts) ? + length(compact.result) : bb_starts[i+1]-1), + compute_preds(i), + compute_succs(i)) + for i = 1:length(result_bbs)] compact = IncrementalCompact(ir, true) # Run instruction compaction to produce the result, # but we're messing with the CFG @@ -1139,6 +1158,8 @@ function cfg_simplify!(ir::IRCode) compact.bb_rename_succ = bb_rename_succ compact.bb_rename_pred = bb_rename_pred compact.result_bbs = cresult_bbs + compact.result_domtree = rename_blocks!(new_domtree, bb_rename_succ) + result_idx = 1 for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb while ms != 0 @@ -1158,7 +1179,6 @@ function cfg_simplify!(ir::IRCode) ms = merged_succ[ms] end end - compact.active_result_bb = length(bb_starts) return finish(compact) end diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index 5bea1c871610e..9276238e640e5 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -271,10 +271,10 @@ end # subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted # at `A`). Note however that this does not work the other way. Thus, the algorithm # needs to make sure that we always visit `B` before `A`. -function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) +function idf(cfg::CFG, liveness::BlockLiveness) # This should be a priority queue, but TODO - sorted array for now defs = liveness.def_bbs - pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)] + pq = Tuple{Int, Int}[(defs[i], cfg.domtree.nodes[defs[i]].level) for i in 1:length(defs)] sort!(pq, by=x->x[2]) phiblocks = Int[] # This bitset makes sure we only add a phi node to a given block once. @@ -298,7 +298,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) # since at this point we know that there is an edge from `node`'s # subtree to `succ`, we know that if succ's level is greater than # that of `node`, it must be dominated by `node`. - succ_level = domtree.nodes[succ].level + succ_level = cfg.domtree.nodes[succ].level succ_level > level && continue # We don't dominate succ. We need to place a phinode, # unless liveness said otherwise. @@ -319,7 +319,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) end end # Recurse down the current subtree - for child in domtree.nodes[active].children + for child in cfg.domtree.nodes[active].children child in visited && continue push!(visited, child) push!(worklist, child) @@ -374,7 +374,7 @@ end RPO traversal and in particular, any use of an SSA value must come after (by linear order) its definition. """ -function domsort_ssa!(ir::IRCode, domtree::DomTree) +function domsort_ssa!(ir::IRCode) # First compute the new order of basic blocks result_order = Int[] stack = Int[] @@ -383,9 +383,8 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) nnewfallthroughs = 0 while node !== -1 push!(result_order, node) - cs = domtree.nodes[node].children + cs = ir.cfg.domtree.nodes[node].children terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst] - iscondbr = isa(terminator, GotoIfNot) let old_node = node + 1 if length(cs) >= 1 # Adding the nodes in reverse sorted order attempts to retain @@ -499,7 +498,17 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) for i in 1:length(result) result[i][:inst] = renumber_ssa!(result[i][:inst], inst_rename, true) end - cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]]) + # Recompute domtree from scratch because adding the new nodes for retaining + # fallthrough can be a significant change to the CFG, and recomputing the + # whole thing is probably faster than making many updates. If not for these + # new nodes, we could use + # + # domtree = rename_blocks!( + # ir.cfg.domtree, + # Int[haskey(bb_rename, old_bb) ? bb_rename[old_bb] : -1 + # for old_bb in 1:length(ir.cfg.blocks)]) + domtree = construct_domtree(new_bbs) + cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]], domtree) new_new_nodes = NewNodeStream(length(ir.new_nodes)) for i = 1:length(ir.new_nodes) new_info = ir.new_nodes.info[i] @@ -579,8 +588,8 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode return new_typ end -function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int, sptypes::Vector{Any}, - slottypes::Vector{Any}) +function construct_ssa!(ci::CodeInfo, ir::IRCode, defuse, nargs::Int, + sptypes::Vector{Any}, slottypes::Vector{Any}) code = ir.stmts.inst cfg = ir.cfg left = Int[] @@ -597,7 +606,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg for (enter_block, exc) in catch_entry_blocks exc_handlers[enter_block+1] = (enter_block, exc) # TODO: Cut off here if the terminator is a leave corresponding to this enter - for block in dominated(domtree, enter_block+1) + for block in dominated(cfg.domtree, enter_block+1) exc_handlers[block] = (enter_block, exc) end end @@ -649,7 +658,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg end end end - phiblocks = idf(cfg, live, domtree) + phiblocks = idf(cfg, live) for block in phiblocks push!(phi_slots[block], idx) node = PhiNode() @@ -893,6 +902,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg local node = new_nodes.stmts[i] node[:inst] = new_to_regular(renumber_ssa!(node[:inst], ssavalmap), nstmts) end - @timeit "domsort" ir = domsort_ssa!(ir, domtree) + @timeit "domsort" ir = domsort_ssa!(ir) return ir end diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 67c03184d4ad5..6f7069f406747 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -11,7 +11,7 @@ if !isdefined(@__MODULE__, Symbol("@verify_error")) end end -function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool) +function check_op(ir::IRCode, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool) if isa(op, SSAValue) if op.id > length(ir.stmts) def_bb = block_for_inst(ir.cfg, ir.new_nodes[op.id - length(ir.stmts)].pos) @@ -28,7 +28,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, end end else - if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb)) + if !dominates(ir.cfg.domtree, def_bb, use_bb) && !(bb_unreachable(ir.cfg.domtree, def_bb) && bb_unreachable(ir.cfg.domtree, use_bb)) # At the moment, we allow GC preserve tokens outside the standard domination notion #@Base.show ir @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))" @@ -61,12 +61,23 @@ function count_int(val::Int, arr::Vector{Int}) end function verify_ir(ir::IRCode, print::Bool=true) + # Verify domtree + if ir.cfg.domtree.idoms_bb != construct_domtree(ir.cfg.blocks).idoms_bb + # Dynamically updating the domtree made it incorrect? + @verify_error "Dominator tree does not match recomputed one" + error() + end + # Verify domtree by comparing with naive algorithm, but only if there + # aren't too many blocks for it to handle + if length(ir.cfg.blocks) < 1000 && ir.cfg.domtree.idoms_bb != naive_idoms(ir.cfg.blocks) + @verify_error "Dominator tree does not match immediate dominators computed by naive algorithm" + error() + end # For now require compact IR # @assert isempty(ir.new_nodes) # Verify CFG last_end = 0 # Verify statements - domtree = construct_domtree(ir.cfg) for (idx, block) in pairs(ir.cfg.blocks) if first(block.stmts) != last_end + 1 #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)] @@ -76,7 +87,7 @@ function verify_ir(ir::IRCode, print::Bool=true) last_end = last(block.stmts) terminator = ir.stmts[last_end][:inst] - bb_unreachable(domtree, idx) && continue + bb_unreachable(ir.cfg.domtree, idx) && continue for p in block.preds p == 0 && continue c = count_int(idx, ir.cfg.blocks[p].succs) @@ -144,7 +155,7 @@ function verify_ir(ir::IRCode, print::Bool=true) for (bb, idx) in bbidxiter(ir) # We allow invalid IR in dead code to avoid passes having to detect when # they're generating dead code. - bb_unreachable(domtree, bb) && continue + bb_unreachable(ir.cfg.domtree, bb) && continue stmt = ir.stmts[idx][:inst] stmt === nothing && continue if isa(stmt, PhiNode) @@ -174,7 +185,7 @@ function verify_ir(ir::IRCode, print::Bool=true) @verify_error "GlobalRefs and Exprs are not allowed as PhiNode values" error() end - check_op(ir, domtree, val, edge, last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print) + check_op(ir, val, edge, last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print) end elseif isa(stmt, PhiCNode) for i = 1:length(stmt.values) @@ -211,7 +222,7 @@ function verify_ir(ir::IRCode, print::Bool=true) end for op in userefs(stmt) op = op[] - check_op(ir, domtree, op, bb, idx, print) + check_op(ir, op, bb, idx, print) end end end diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 85ac8fd6a0f4c..334bafc5c8685 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -161,7 +161,7 @@ function f_ifelse(x) return b ? x + 1 : x end # 2 for now because the compiler leaves a GotoNode around -@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 +@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 # Test that inlining of _apply properly hits the inference cache @noinline cprop_inline_foo1() = (1, 1) diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index be7539cac4d9f..afed12c2d6fc1 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -34,8 +34,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg) - ir = Core.Compiler.domsort_ssa!(ir, domtree) + ir = Core.Compiler.domsort_ssa!(ir) Core.Compiler.verify_ir(ir) phi = ir.stmts.inst[3] @test isa(phi, Core.PhiNode) && length(phi.edges) == 1 @@ -62,8 +61,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg) - ir = Core.Compiler.domsort_ssa!(ir, domtree) + ir = Core.Compiler.domsort_ssa!(ir) Core.Compiler.verify_ir(ir) end @@ -180,8 +178,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any]) @test Core.Compiler.verify_ir(ir) === nothing - domtree = Core.Compiler.construct_domtree(ir.cfg) - ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir, domtree) + ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir) @test Core.Compiler.verify_ir(ir) === nothing end @@ -257,6 +254,30 @@ let m = Meta.@lower 1 + 1 @test isa(ret_2, Core.Compiler.ReturnNode) && ret_2.val == 2 end +let m = Meta.@lower 1 + 1 + # Test that CFG simplify doesn't try to merge every block in a loop into + # its predecessor + @assert Meta.isexpr(m, :thunk) + src = m.args[1]::Core.CodeInfo + src.code = Any[ + # Block 1 + Core.Compiler.GotoNode(2), + # Block 2 + Core.Compiler.GotoNode(3), + # Block 3 + Core.Compiler.GotoNode(1) + ] + nstmts = length(src.code) + src.ssavaluetypes = nstmts + src.codelocs = fill(Int32(1), nstmts) + src.ssaflags = fill(Int32(0), nstmts) + ir = Core.Compiler.inflate_ir(src) + Core.Compiler.verify_ir(ir) + ir = Core.Compiler.cfg_simplify!(ir) + Core.Compiler.verify_ir(ir) + @test length(ir.cfg.blocks) == 1 +end + # Issue #29213 function f_29213() while true diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 3f7b9b02a728f..e04f4c89ca727 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -1,7 +1,21 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +using Base.Meta using Core.IR const Compiler = Core.Compiler +using .Compiler: CFG, BasicBlock + +make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs) + +function make_ci(code) + ci = (Meta.@lower 1 + 1).args[1] + ci.code = code + nstmts = length(ci.code) + ci.ssavaluetypes = nstmts + ci.codelocs = fill(Int32(1), nstmts) + ci.ssaflags = fill(Int32(0), nstmts) + return ci +end # TODO: this test is broken #let code = Any[ @@ -28,7 +42,6 @@ const Compiler = Core.Compiler #end # Issue #31121 -using .Compiler: CFG, BasicBlock # We have the following CFG and corresponding DFS numbering: # @@ -47,7 +60,6 @@ using .Compiler: CFG, BasicBlock # than `3`, so the idom search missed that `1` is `3`'s semi-dominator). Here # we manually construct that CFG and verify that the DFS records the correct # parent. -make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs) let cfg = CFG(BasicBlock[ make_bb([] , [2, 3]), make_bb([1] , [4, 5]), @@ -55,10 +67,10 @@ let cfg = CFG(BasicBlock[ make_bb([2, 3] , [5] ), make_bb([2, 4] , [] ), ], Int[]) - dfs = Compiler.DFS(cfg, Compiler.BBNumber(1)) - @test dfs.numbering[dfs.parents[dfs.reverse[5]]] == 4 - let correct_idoms = Compiler.naive_idoms(cfg) - @test Compiler.SNCA(cfg) == correct_idoms + dfs = Compiler.DFS(cfg.blocks) + @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4 + let correct_idoms = Compiler.naive_idoms(cfg.blocks) + @test cfg.domtree.idoms_bb == correct_idoms # For completeness, reverse the order of pred/succ in the CFG and verify # the answer doesn't change (it does change the which node is chosen # as the semi-dominator, since it changes the DFS numbering). @@ -69,7 +81,7 @@ let cfg = CFG(BasicBlock[ c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs)) d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs)) cfg′ = CFG(blocks, cfg.index) - @test Compiler.SNCA(cfg′) == correct_idoms + @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms end end end @@ -135,9 +147,7 @@ end @test f32579(0, false) === false # Test for bug caused by renaming blocks improperly, related to PR #32145 -using Base.Meta -let ci = (Meta.@lower 1 + 1).args[1] - ci.code = [ +let ci = make_ci([ # block 1 Core.Compiler.GotoIfNot(Expr(:boundscheck), 6), # block 2 @@ -155,18 +165,13 @@ let ci = (Meta.@lower 1 + 1).args[1] Core.Compiler.ReturnNode(Core.SSAValue(8)), # block 6 Core.Compiler.ReturnNode(Core.SSAValue(8)) - ] - nstmts = length(ci.code) - ci.ssavaluetypes = nstmts - ci.codelocs = fill(Int32(1), nstmts) - ci.ssaflags = fill(Int32(0), nstmts) + ]) ir = Core.Compiler.inflate_ir(ci) ir = Core.Compiler.compact!(ir, true) @test Core.Compiler.verify_ir(ir) == nothing end # Test that GlobalRef in value position is non-canonical -using Base.Meta let ci = (Meta.@lower 1 + 1).args[1] ci.code = [ Expr(:call, GlobalRef(Main, :something_not_defined_please)) @@ -180,3 +185,174 @@ let ci = (Meta.@lower 1 + 1).args[1] ir = Core.Compiler.compact!(ir, true) @test_throws ErrorException Core.Compiler.verify_ir(ir, false) end + +# Issue #29107 +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoNode(6), + # Block 2 + # The following phi node gets deleted because it only has one edge, so + # the call to `something` is made to use the value of `something2()`, + # even though this value is defined after it. We don't want this to + # happen even though this block is dead because subsequent optimization + # passes may look at all code, dead or not. + Core.PhiNode(Any[2], Any[Core.SSAValue(4)]), + Expr(:call, :something, Core.SSAValue(2)), + Expr(:call, :something2), + Core.Compiler.GotoNode(2), + # Block 3 + Core.Compiler.ReturnNode(1000) + ]) + ir = Core.Compiler.inflate_ir(ci) + ir = Core.Compiler.compact!(ir, true) + # Make sure that if there is a call to `something` (block 2 should be + # removed entirely with working DCE), it doesn't use any SSA values that + # come after it. + for i in 1:length(ir.stmts) + s = ir.stmts[i] + if isa(s, Expr) && s.head == :call && s.args[1] == :something + if isa(s.args[2], SSAValue) + @test s.args[2].id <= i + end + end + end +end + +# Make sure dead blocks that are removed are not still referenced in live phi +# nodes +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 4), + # Block 2 + Core.Compiler.GotoNode(4), + # Block 3 (no predecessors) + Core.Compiler.GotoNode(4), + # Block 4 + Core.PhiNode(Any[1, 2, 3], Any[100, 200, 300]), + Core.Compiler.ReturnNode(Core.SSAValue(4)) + ]) + ir = Core.Compiler.inflate_ir(ci) + ir = Core.Compiler.compact!(ir, true) + @test Core.Compiler.verify_ir(ir) == nothing +end + +# Test dynamic update of domtree with edge insertions and deletions in the +# following CFG: +# +# 1,1 +# | \ +# | \ +# | 3,4 < +# | | \ +# 2,2 4,5 | +# | | / +# | 6,6 / +# | / +# | / +# 5,3 +# +# Nodes indicate BB number, preorder number +# Edges point down, except the arrow that points up +let cfg = CFG(BasicBlock[ + make_bb([], [3, 2]), # the order of the successors is deliberate + make_bb([1], [5]), # and is to determine the preorder numbers + make_bb([1, 6], [4]), + make_bb([3], [6]), + make_bb([2, 6], []), + make_bb([4], [5, 3]), + ], Int[]) + @test cfg.domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Test removal of edge between a parent and child in the DFS tree, which + # should trigger complete recomputation of domtree (first case in algorithm + # for removing edge from domtree dynamically) + Compiler.cfg_delete_edge!(cfg, 2, 5) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4] + # Add edge back (testing first case for insertion) + Compiler.cfg_insert_edge!(cfg, 2, 5) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Test second case in algorithm for removing edges from domtree, in which + # `from` is on a semidominator path from the semidominator of `to` to `to` + Compiler.cfg_delete_edge!(cfg, 6, 5) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4] + # Add edge back (testing second case for insertion) + Compiler.cfg_insert_edge!(cfg, 6, 5) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Test last case for removing edges, in which edge does not satisfy either + # of the above conditions + Compiler.cfg_delete_edge!(cfg, 6, 3) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + # Add edge back (testing second case for insertion) + Compiler.cfg_insert_edge!(cfg, 6, 3) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + + # Try removing all edges from root + Compiler.cfg_delete_edge!(cfg, 1, 2) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4] + Compiler.cfg_delete_edge!(cfg, 1, 3) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0] + # Add edges back + Compiler.cfg_insert_edge!(cfg, 1, 2) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0] + Compiler.cfg_insert_edge!(cfg, 1, 3) + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] +end + +# Make sure killing edges while iterating through an `IncrementalCompact` kills +# all the statements in the dead blocks too. +# +# Block that becomes unreachable is before the active BB +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoNode(4), + # Block 2 + Core.Compiler.PhiNode(Any[2, 4], Any[100, 200]), + Core.Compiler.GotoNode(2), + # Block 3 + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 2), + # Block 4 + Core.Compiler.ReturnNode(0) + ]) + ir = Core.Compiler.inflate_ir(ci) + compact = Core.Compiler.IncrementalCompact(ir, true) + Core.Compiler.foreach(x -> begin + # Delete edge to block 2 when at statement 4 + if x.first == 4 + Core.Compiler.kill_edge!( + compact, compact.active_result_bb, 3, 2) + compact.result[4] = Core.Compiler.GotoNode(4) + end + end, compact) + ir = Core.Compiler.finish(compact) + @test Core.Compiler.verify_ir(ir) === nothing + @test ir.stmts[2][:inst] === nothing +end +# Block that becomes unreachable is after the active BB +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 3), + # Block 2 + Core.Compiler.GotoNode(5), + # Block 3 + Core.Compiler.PhiNode(Any[2, 3], Any[100, 200]), + Core.Compiler.GotoNode(3), + # Block 4 + Core.Compiler.ReturnNode(0) + ]) + ir = Core.Compiler.inflate_ir(ci) + compact = Core.Compiler.IncrementalCompact(ir, true) + Core.Compiler.foreach(x -> begin + # Delete edge to block 3 when at statement 1 + if x.first == 1 + Core.Compiler.kill_edge!( + compact, compact.active_result_bb, 1, 3) + compact.result[1] = Core.Compiler.GotoNode(2) + end + end, compact) + ir = Core.Compiler.finish(compact) + @test Core.Compiler.verify_ir(ir) === nothing + @test ir.stmts[3][:inst] === nothing +end