diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 1ab2876b769da..674e5de85ce3b 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -173,6 +173,8 @@ struct SNCAData label::PreNumber end +copy(d::SNCAData) = SNCAData(d.semi, d.label) + "Represents a Basic Block, in the DomTree" struct DomTreeNode # How deep we are in the DomTree @@ -183,6 +185,8 @@ end DomTreeNode() = DomTreeNode(1, Vector{BBNumber}()) +copy(n::DomTreeNode) = DomTreeNode(n.level, copy(n.children)) + "Data structure that encodes which basic block dominates which." struct DomTree # These can be reused when updating domtree dynamically @@ -196,9 +200,12 @@ struct DomTree nodes::Vector{DomTreeNode} end -function DomTree() - return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) -end +DomTree() = DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) + +copy(d::DomTree) = DomTree(copy(d.dfs_tree), + copy(d.snca_state), + copy(d.idoms_bb), + copy(d.nodes)) function construct_domtree(blocks::Vector{BasicBlock}) return update_domtree!(blocks, DomTree(), true, 0) @@ -207,7 +214,7 @@ end function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree, recompute_dfs::Bool, max_pre::PreNumber) if recompute_dfs - DFS!(domtree.dfs_tree, blocks) + DFS!(domtree.dfs_tree, cfg.blocks) end if max_pre == 0 @@ -482,9 +489,9 @@ end Rename basic block numbers in a dominator tree, removing the block if it is renamed to -1. """ -function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber}) +function rename_blocks!(domtree::DomTree, rename_bb::Vector{BBNumber}) # Rename DFS tree - rename_nodes!(domtree.dfs_tree, rename_bb) + rename_blocks!(domtree.dfs_tree, rename_bb) # `snca_state` is indexed by preorder number, so should be unchanged @@ -512,7 +519,7 @@ end Rename basic block numbers in a DFS tree, removing the block if it is renamed to -1. """ -function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber}) +function rename_blocks!(D::DFSTree, rename_bb::Vector{BBNumber}) n_blocks = length(D.to_pre) n_reachable_blocks = length(D.from_pre) @@ -540,6 +547,91 @@ function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber}) return D end +""" +Combine two blocks `from` and `to`. It is assumed that there is an edge from +`from` to `to`, and that the sole successor of `from` is `to` and the sole +predecessor of `to` is `from`. This does not remove the `to` node, which is +then considered to have no edges. +""" +function combine_blocks!(domtree::DomTree, from::BBNumber, to::BBNumber) + orig_pre_of_to = domtree.dfs_tree.to_pre[to] + + # Combine nodes in DFS tree + combine_blocks!(domtree.dfs_tree, from, to) + + # Update `snca_state` + deleteat!(domtree.snca_state, orig_pre_of_to) + for pre in 1:length(domtree.snca_state) + # If semidominator was `to`, set it to `from` instead + if domtree.snca_state[pre].semi == orig_pre_of_to + label = domtree.snca_state[pre].label + domtree.snca_state[pre] = SNCAData(orig_pre_of_to - 1, label) + end + # Labels will be reset anyway, so no need to update them + end + + # Update `idoms_bb` + for bb in 1:length(domtree.idoms_bb) + # If immediate dominator was `to`, set it to `from` instead + if domtree.idoms_bb[bb] == to + domtree.idoms_bb[bb] = from + end + end + + # Update `nodes` + copy!(domtree.nodes[from].children, domtree.nodes[to].children) + domtree.nodes[to] = DomTreeNode() + # Recursively decrement level of the affected children + worklist = domtree.nodes[from].children + while !isempty(worklist) + node = pop!(worklist) + domtree.nodes[node] = DomTreeNode(domtree.nodes[node].level-1, + domtree.nodes[node].children) + foreach(child -> push!(worklist, child), + domtree.nodes[node].children) + end + + return domtree +end + +function combine_blocks!(D::DFSTree, from::BBNumber, to::BBNumber) + # Reassignment of preorder and postorder numbers depend on these + # conditions, which should hold if `from` has only one successor `to` and + # `to` has only one predecessor `from`. + @assert D.to_pre[from] + 1 == D.to_pre[to] + @assert D.to_post[from] == D.to_post[to] + 1 + + # Set preorder number of `to` to 0, + # decrement preorder numbers greater than original preorder number of `to`. + # Set postorder number of `to` to 0, + # decrement postorder numbers greater than original preorder number of `to`. + + orig_pre_of_to = D.to_pre[to] + orig_post_of_to = D.to_post[to] + + D.to_pre[to] = 0 + D.to_post[to] = 0 + for bb in 1:length(D.to_pre) + if D.to_pre[bb] > orig_pre_of_to + D.to_pre[bb] -= 1 + end + if D.to_post[bb] > orig_post_of_to + D.to_post[bb] -= 1 + end + end + deleteat!(D.from_pre, orig_pre_of_to) + deleteat!(D.from_post, orig_post_of_to) + + deleteat!(D.to_parent_pre, orig_pre_of_to) + for pre in 1:length(D.to_parent_pre) + if D.to_parent_pre[pre] > orig_pre_of_to + D.to_parent_pre[pre] -= 1 + end + end + + return D +end + """ Checks if bb1 dominates bb2. bb1 and bb2 are indexes into the CFG blocks. diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index 83205033342d6..5ad99c851742c 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -112,9 +112,8 @@ end function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code - @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks) defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst) - @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` + @timeit "construct_ssa" ir = construct_ssa!(ci, ir, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` return ir end @@ -123,7 +122,6 @@ function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState) ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, nargs, sv) ir = slot2reg(ir, ci, nargs, sv) #@Base.show ("after_construct", ir) - # TODO: Domsorting can produce an updated domtree - no need to recompute here @timeit "compact 1" ir = compact!(ir) @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) #@timeit "verify 2" verify_ir(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 0e95f812e5eb6..c923f5d9ce6f2 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -295,6 +295,7 @@ function finish_cfg_inline!(state::CFGInliningState) end end +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -415,6 +416,7 @@ end const fatal_type_bound_error = ErrorException("fatal error in type inference (type bound)") +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -497,7 +499,9 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, end function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool) - # Compute the new CFG first (modulo statement ranges, which will be computed below) + # Compute the new CFG first (modulo statement ranges, which will be + # computed below, and the domtree, which will be updated below before we + # iterate through the statements) state = CFGInliningState(ir) for (idx, item) in todo if isa(item, UnionSplit) @@ -519,6 +523,9 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect let compact = IncrementalCompact(ir, false) compact.result_bbs = state.new_cfg_blocks + # Recompute the domtree now that the CFG has been modified + compact.result_domtree = construct_domtree(compact.result_bbs) + # This needs to be a minimum and is more of a size hint nn = 0 for (_, item) in todo @@ -567,7 +574,6 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect compact[idx] = PhiNode(Int32[edge == length(state.bb_rename) ? length(state.new_cfg_blocks) : state.bb_rename[edge+1]-1 for edge in stmt.edges], stmt.values) end end - ir = finish(compact) end return ir diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 3960ab44649b1..2da91d3028372 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -9,23 +9,28 @@ struct CFG blocks::Vector{BasicBlock} index::Vector{Int} # map from instruction => basic-block number # TODO: make this O(1) instead of O(log(n_blocks))? + domtree::DomTree end -copy(c::CFG) = CFG(BasicBlock[copy(b) for b in c.blocks], copy(c.index)) +function CFG(blocks::Vector{BasicBlock}, index::Vector{Int}) + return CFG(blocks, index, construct_domtree(blocks)) +end + +copy(c::CFG) = CFG(BasicBlock[copy(b) for b in c.blocks], + copy(c.index), + copy(c.domtree)) function cfg_insert_edge!(cfg::CFG, from::Int, to::Int) # Assumes that this edge does not already exist push!(cfg.blocks[to].preds, from) push!(cfg.blocks[from].succs, to) + domtree_insert_edge!(cfg.domtree, cfg.blocks, from, to) nothing end function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) - preds = cfg.blocks[to].preds - succs = cfg.blocks[from].succs - # Assumes that blocks appear at most once in preds and succs - deleteat!(preds, findfirst(x->x === from, preds)::Int) - deleteat!(succs, findfirst(x->x === to, succs)::Int) + kill_edge!(cfg.blocks, from, to) + domtree_delete_edge!(cfg.domtree, cfg.blocks, from, to) nothing end @@ -507,8 +512,12 @@ mutable struct IncrementalCompact ir::IRCode result::InstructionStream result_bbs::Vector{BasicBlock} + result_domtree::DomTree ssa_rename::Vector{Any} + # `bb_rename_pred` is for renaming predecessors to the blocks they have + # been merged into (differs from `bb_rename_succ` only if blocks are + # combined), `bb_rename_succ` is how blocks are actually renamed bb_rename_pred::Vector{Int} bb_rename_succ::Vector{Int} @@ -530,7 +539,7 @@ mutable struct IncrementalCompact cfg_transforms_enabled::Bool fold_constant_branches::Bool - function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false) + function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=true) # Sort by position with attach after nodes after regular ones perm = my_sortperm(Int[let new_node = code.new_nodes.info[i] (new_node.pos * 2 + Int(new_node.attach_after)) @@ -542,9 +551,8 @@ mutable struct IncrementalCompact if allow_cfg_transforms bb_rename = Vector{Int}(undef, length(blocks)) cur_bb = 1 - domtree = construct_domtree(blocks) for i = 1:length(bb_rename) - if bb_unreachable(domtree, i) + if bb_unreachable(code.cfg.domtree, i) bb_rename[i] = -1 else bb_rename[i] = cur_bb @@ -568,20 +576,24 @@ mutable struct IncrementalCompact end end let blocks = blocks, bb_rename = bb_rename - result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1] + result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) + if bb_rename[i] != -1] end + result_domtree = rename_blocks!(copy(code.cfg.domtree), bb_rename) else bb_rename = Vector{Int}() result_bbs = code.cfg.blocks + result_domtree = code.cfg.domtree end ssa_rename = Any[SSAValue(i) for i = 1:new_len] late_fixup = Vector{Int}() new_new_nodes = NewNodeStream() pending_nodes = NewNodeStream() pending_perm = Int[] - return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms) + return new(code, result, result_bbs, result_domtree, ssa_rename, + bb_rename, bb_rename, used_ssas, late_fixup, perm, 1, + new_new_nodes, pending_nodes, pending_perm, 1, 1, 1, false, + allow_cfg_transforms, allow_cfg_transforms) end # For inlining @@ -595,11 +607,11 @@ mutable struct IncrementalCompact new_new_nodes = NewNodeStream() pending_nodes = NewNodeStream() pending_perm = Int[] - return new(code, parent.result, - parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas, - late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, result_offset, parent.active_result_bb, false, false, false) + return new(code, parent.result, parent.result_bbs, + parent.result_domtree, ssa_rename, bb_rename, bb_rename, + parent.used_ssas, late_fixup, perm, 1, new_new_nodes, + pending_nodes, pending_perm, 1, result_offset, + parent.active_result_bb, false, false, false) end end @@ -871,63 +883,82 @@ function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int) preds, succs = bbs[to].preds, bbs[from].succs deleteat!(preds, findfirst(x->x === from, preds)::Int) deleteat!(succs, findfirst(x->x === to, succs)::Int) - if length(preds) == 0 - for succ in copy(bbs[to].succs) - kill_edge!(bbs, to, succ) - end - end + nothing end # N.B.: from and to are non-renamed indices function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int) - # Note: We recursively kill as many edges as are obviously dead. However, this - # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or - # worstcase during codegen). - preds = compact.result_bbs[compact.bb_rename_succ[to]].preds - succs = compact.result_bbs[compact.bb_rename_pred[from]].succs - deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int) - deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int) - # Check if the block is now dead - if length(preds) == 0 - for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs) - kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred)) + renamed_from = compact.bb_rename_pred[from] + renamed_to = compact.bb_rename_succ[to] + + preds = compact.result_bbs[renamed_to].preds + succs = compact.result_bbs[renamed_from].succs + + deleteat!(preds, findfirst(x -> x === renamed_from, preds)::Int) + deleteat!(succs, findfirst(x -> x === renamed_to, succs)::Int) + + domtree_delete_edge!(compact.result_domtree, + compact.result_bbs, + renamed_from, + renamed_to) + + # Recursively kill edges to dead blocks. This is not necessary for removing + # dead blocks with a subsequent `IncrementalCompact`, but killing all the + # statements in dead blocks allows passes not to have to worry about + # whether statements have become invalid by becoming dead. + if bb_unreachable(compact.result_domtree, renamed_to) + for succ in copy(compact.result_bbs[renamed_to].succs) + # Make sure edge hasn't already been killed in a previous iteration + # of this loop + if succ in compact.result_bbs[renamed_to].succs + kill_edge!(compact, active_bb, to, + findfirst(x -> x === succ, compact.bb_rename_pred)) + end end - if to < active_bb - # Kill all statements in the block - stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts + + # Kill all statements in dead successor if it has already been + # processed. Also kill statements if dead successor is the active BB, + # in order to kill statements that have already been processed, + # although statements in the current BB that have not been processed + # will be killed in `process_node!`. Statements in dead blocks that + # have not yet been processed (`to` > `active_bb`) will be killed in + # `process_node!`. + if to <= active_bb + stmts = compact.result_bbs[renamed_to].stmts for stmt in stmts compact.result[stmt][:inst] = nothing end compact.result[last(stmts)][:inst] = ReturnNode() end - else - # Remove this edge from all phi nodes in `to` block - # NOTE: It is possible for `to` to contain only `nothing` statements, - # so we must be careful to stop at its last statement - if to < active_bb - stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts - idx = first(stmts) - while idx <= last(stmts) - stmt = compact.result[idx][:inst] - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x == compact.bb_rename_pred[from], stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end + end + + # Remove this edge from any phi nodes + if to < active_bb + stmts = compact.result_bbs[renamed_to].stmts + idx = first(stmts) + while idx <= last(stmts) + stmt = compact.result[idx][:inst] + if stmt === nothing idx += 1 + continue end - else - stmts = compact.ir.cfg.blocks[to].stmts - for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x == from, stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end + isa(stmt, PhiNode) || break + i = findfirst(x -> x === renamed_from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) + end + idx += 1 + end + else + stmts = compact.ir.cfg.blocks[to].stmts + for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) + stmt === nothing && continue + isa(stmt, PhiNode) || break + i = findfirst(x -> x === from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) end end end @@ -941,7 +972,19 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr late_fixup = compact.late_fixup used_ssas = compact.used_ssas ssa_rename[idx] = SSAValue(result_idx) - if stmt === nothing + if compact.cfg_transforms_enabled && + # Don't kill statements in blocks renamed to -1 because + # `cfg_simplify!` renames blocks that have been merged into others + # to -1, but the statements in them are not dead. Unreachable + # blocks detected in the constructor of `IncrementalCompact` are + # removed then, so we never encounter statements from them here. + compact.bb_rename_succ[active_bb] != -1 && + bb_unreachable(compact.result_domtree, + compact.bb_rename_succ[active_bb]) + # Kill statements in dead blocks + result[result_idx][:inst] = isterminator(stmt) ? ReturnNode() : nothing + result_idx += 1 + elseif stmt === nothing ssa_rename[idx] = stmt elseif isa(stmt, OldSSAValue) ssa_rename[idx] = ssa_rename[stmt.id] @@ -1244,7 +1287,6 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= # result_idx is not, incremented, but that's ok and expected compact.result[old_result_idx] = compact.ir.stmts[idx] result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx, active_bb, true) - stmt_if_any = old_result_idx == result_idx ? nothing : compact.result[old_result_idx][:inst] compact.result_idx = result_idx if idx == last(bb.stmts) && !attach_after_stmt_after(compact, idx) finish_current_bb!(compact, active_bb, old_result_idx) @@ -1385,11 +1427,13 @@ end function complete(compact::IncrementalCompact) result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1) - cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)]) + cfg = CFG(result_bbs, + Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)], + compact.result_domtree) return IRCode(compact.ir, compact.result, cfg, compact.new_new_nodes) end -function compact!(code::IRCode, allow_cfg_transforms::Bool=false) +function compact!(code::IRCode, allow_cfg_transforms::Bool=true) compact = IncrementalCompact(code, allow_cfg_transforms) # Just run through the iterator without any processing for _ in compact; end # _ isa Pair{Int, Any} diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 8ce0c01cf5f75..59071e46b5ffd 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -69,8 +69,8 @@ function val_for_def_expr(ir::IRCode, def::Int, fidx::Int) end end -function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) - curblock = find_curblock(domtree, allblocks, curblock) +function compute_value_for_block(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) + curblock = find_curblock(ir.cfg.domtree, allblocks, curblock) def = 0 for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock @@ -80,10 +80,10 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx) end -function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int) +function compute_value_for_use(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int) # Find the first dominating def curblock = stmtblock = block_for_inst(ir.cfg, use_idx) - curblock = find_curblock(domtree, allblocks, curblock) + curblock = find_curblock(ir.cfg.domtree, allblocks, curblock) defblockdefs = let curblock = curblock Int[stmt for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock] end @@ -107,7 +107,7 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I if !haskey(phinodes, curblock) # If this happens, we need to search the predecessors for defs. Which # one doesn't matter - if it did, we'd have had a phinode - return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) + return compute_value_for_block(ir, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) end # The use is the phinode return phinodes[curblock] @@ -723,13 +723,6 @@ function getfield_elim_pass!(ir::IRCode) used_ssas = copy(compact.used_ssas) simple_dce!(compact) ir = complete(compact) - - # Compute domtree, needed below, now that we have finished compacting the - # IR. This needs to be after we iterate through the IR with - # `IncrementalCompact` because removing dead blocks can invalidate the - # domtree. - @timeit "domtree 2" domtree = construct_domtree(ir.cfg.blocks) - # Now go through any mutable structs and see which ones we can eliminate for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) @@ -796,7 +789,7 @@ function getfield_elim_pass!(ir::IRCode) ldu = compute_live_ins(ir.cfg, du) phiblocks = Int[] if !isempty(ldu.live_in_bbs) - phiblocks = idf(ir.cfg, ldu, domtree) + phiblocks = idf(ir.cfg, ldu) end phinodes = IdDict{Int, SSAValue}() for b in phiblocks @@ -806,19 +799,19 @@ function getfield_elim_pass!(ir::IRCode) # Now go through all uses and rewrite them allblocks = sort(vcat(phiblocks, ldu.def_bbs)) for stmt in du.uses - ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt) + ir[SSAValue(stmt)] = compute_value_for_use(ir, allblocks, du, phinodes, fidx, stmt) end if !isbitstype(fieldtype(typ, fidx)) for (use, list) in preserve_uses - push!(list, compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, use)) + push!(list, compute_value_for_use(ir, allblocks, du, phinodes, fidx, use)) end end for b in phiblocks for p in ir.cfg.blocks[b].preds n = ir[phinodes[b]] push!(n.edges, p) - push!(n.values, compute_value_for_block(ir, domtree, - allblocks, du, phinodes, fidx, p)) + push!(n.values, compute_value_for_block( + ir, allblocks, du, phinodes, fidx, p)) end end end @@ -1043,6 +1036,9 @@ end function cfg_simplify!(ir::IRCode) bbs = ir.cfg.blocks + old_domtree = ir.cfg.domtree + new_domtree = copy(old_domtree) + merge_into = zeros(Int, length(bbs)) merged_succ = zeros(Int, length(bbs)) function follow_merge_into(idx::Int) @@ -1066,6 +1062,13 @@ function cfg_simplify!(ir::IRCode) # Prevent cycles by making sure we don't end up back at `idx` # by following what is to be merged into `succ` if follow_merged_succ(succ) != idx + # Update domtree, by combining the block that `idx` is + # going to be merged into with the block that `succ` was + # going to be merged into (before this merge) + combine_blocks!(new_domtree, + follow_merge_into(idx), + follow_merge_into(succ)) + merge_into[succ] = idx merged_succ[idx] = succ end @@ -1077,12 +1080,8 @@ function cfg_simplify!(ir::IRCode) max_bb_num = 1 bb_rename_succ = zeros(Int, length(bbs)) for i = 1:length(bbs) - # Drop blocks that will be merged away - if merge_into[i] != 0 - bb_rename_succ[i] = -1 - end - # Drop blocks with no predecessors - if i != 1 && length(ir.cfg.blocks[i].preds) == 0 + # Drop blocks that will be merged away or are unreachable + if merge_into[i] != 0 || bb_unreachable(old_domtree, i) bb_rename_succ[i] = -1 end @@ -1171,6 +1170,7 @@ function cfg_simplify!(ir::IRCode) compact.bb_rename_succ = bb_rename_succ compact.bb_rename_pred = bb_rename_pred compact.result_bbs = cresult_bbs + compact.result_domtree = rename_blocks!(new_domtree, bb_rename_succ) result_idx = 1 for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index 057bb72ff1152..ebede586f594d 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -269,10 +269,10 @@ end # subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted # at `A`). Note however that this does not work the other way. Thus, the algorithm # needs to make sure that we always visit `B` before `A`. -function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) +function idf(cfg::CFG, liveness::BlockLiveness) # This should be a priority queue, but TODO - sorted array for now defs = liveness.def_bbs - pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)] + pq = Tuple{Int, Int}[(defs[i], cfg.domtree.nodes[defs[i]].level) for i in 1:length(defs)] sort!(pq, by=x->x[2]) phiblocks = Int[] # This bitset makes sure we only add a phi node to a given block once. @@ -296,7 +296,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) # since at this point we know that there is an edge from `node`'s # subtree to `succ`, we know that if succ's level is greater than # that of `node`, it must be dominated by `node`. - succ_level = domtree.nodes[succ].level + succ_level = cfg.domtree.nodes[succ].level succ_level > level && continue # We don't dominate succ. We need to place a phinode, # unless liveness said otherwise. @@ -317,7 +317,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) end end # Recurse down the current subtree - for child in domtree.nodes[active].children + for child in cfg.domtree.nodes[active].children child in visited && continue push!(visited, child) push!(worklist, child) @@ -373,7 +373,7 @@ end RPO traversal and in particular, any use of an SSA value must come after (by linear order) its definition. """ -function domsort_ssa!(ir::IRCode, domtree::DomTree) +function domsort_ssa!(ir::IRCode) # First compute the new order of basic blocks result_order = Int[] stack = Int[] @@ -382,9 +382,8 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) nnewfallthroughs = 0 while node !== -1 push!(result_order, node) - cs = domtree.nodes[node].children + cs = ir.cfg.domtree.nodes[node].children terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst] - iscondbr = isa(terminator, GotoIfNot) let old_node = node + 1 if length(cs) >= 1 # Adding the nodes in reverse sorted order attempts to retain @@ -501,7 +500,17 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) for i in 1:length(result) result[i][:inst] = renumber_ssa!(result[i][:inst], inst_rename, true) end - cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]]) + # Recompute domtree from scratch because adding the new nodes for retaining + # fallthrough can be a significant change to the CFG, and recomputing the + # whole thing is probably faster than making many updates. If not for these + # new nodes, we could use + # + # domtree = rename_blocks!( + # ir.cfg.domtree, + # Int[haskey(bb_rename, old_bb) ? bb_rename[old_bb] : -1 + # for old_bb in 1:length(ir.cfg.blocks)]) + domtree = construct_domtree(new_bbs) + cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]], domtree) new_new_nodes = NewNodeStream(length(ir.new_nodes)) for i = 1:length(ir.new_nodes) new_info = ir.new_nodes.info[i] @@ -581,8 +590,8 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode return new_typ end -function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int, sptypes::Vector{Any}, - slottypes::Vector{Any}) +function construct_ssa!(ci::CodeInfo, ir::IRCode, defuse, nargs::Int, + sptypes::Vector{Any}, slottypes::Vector{Any}) code = ir.stmts.inst cfg = ir.cfg left = Int[] @@ -599,7 +608,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg for (enter_block, exc) in catch_entry_blocks exc_handlers[enter_block+1] = (enter_block, exc) # TODO: Cut off here if the terminator is a leave corresponding to this enter - for block in dominated(domtree, enter_block+1) + for block in dominated(cfg.domtree, enter_block+1) exc_handlers[block] = (enter_block, exc) end end @@ -651,7 +660,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg end end end - phiblocks = idf(cfg, live, domtree) + phiblocks = idf(cfg, live) for block in phiblocks push!(phi_slots[block], idx) node = PhiNode() @@ -895,6 +904,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg local node = new_nodes.stmts[i] node[:inst] = new_to_regular(renumber_ssa!(node[:inst], ssavalmap), nstmts) end - @timeit "domsort" ir = domsort_ssa!(ir, domtree) + @timeit "domsort" ir = domsort_ssa!(ir) return ir end diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 1d64a77444d47..c60390ed29c91 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -11,7 +11,7 @@ if !isdefined(@__MODULE__, Symbol("@verify_error")) end end -function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool) +function check_op(ir::IRCode, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool) if isa(op, SSAValue) if op.id > length(ir.stmts) def_bb = block_for_inst(ir.cfg, ir.new_nodes[op.id - length(ir.stmts)].pos) @@ -28,7 +28,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, end end else - if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb)) + if !dominates(ir.cfg.domtree, def_bb, use_bb) && !(bb_unreachable(ir.cfg.domtree, def_bb) && bb_unreachable(ir.cfg.domtree, use_bb)) # At the moment, we allow GC preserve tokens outside the standard domination notion #@Base.show ir @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))" @@ -61,12 +61,23 @@ function count_int(val::Int, arr::Vector{Int}) end function verify_ir(ir::IRCode, print::Bool=true) + # Verify domtree + if ir.cfg.domtree.idoms_bb != construct_domtree(ir.cfg.blocks).idoms_bb + # Dynamically updating the domtree made it incorrect? + @verify_error "Dominator tree does not match recomputed one" + error() + end + # Verify domtree by comparing with naive algorithm, but only if there + # aren't too many blocks for it to handle + if length(ir.cfg.blocks) < 1000 && ir.cfg.domtree.idoms_bb != naive_idoms(ir.cfg.blocks) + @verify_error "Dominator tree does not match immediate dominators computed by naive algorithm" + error() + end # For now require compact IR # @assert isempty(ir.new_nodes) # Verify CFG last_end = 0 # Verify statements - domtree = construct_domtree(ir.cfg.blocks) for (idx, block) in pairs(ir.cfg.blocks) if first(block.stmts) != last_end + 1 #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)] @@ -76,7 +87,7 @@ function verify_ir(ir::IRCode, print::Bool=true) last_end = last(block.stmts) terminator = ir.stmts[last_end][:inst] - bb_unreachable(domtree, idx) && continue + bb_unreachable(ir.cfg.domtree, idx) && continue for p in block.preds p == 0 && continue c = count_int(idx, ir.cfg.blocks[p].succs) @@ -144,7 +155,7 @@ function verify_ir(ir::IRCode, print::Bool=true) for (bb, idx) in bbidxiter(ir) # We allow invalid IR in dead code to avoid passes having to detect when # they're generating dead code. - bb_unreachable(domtree, bb) && continue + bb_unreachable(ir.cfg.domtree, bb) && continue stmt = ir.stmts[idx][:inst] stmt === nothing && continue if isa(stmt, PhiNode) @@ -174,7 +185,7 @@ function verify_ir(ir::IRCode, print::Bool=true) @verify_error "GlobalRefs and Exprs are not allowed as PhiNode values" error() end - check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print) + check_op(ir, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print) end elseif isa(stmt, PhiCNode) for i = 1:length(stmt.values) @@ -214,7 +225,7 @@ function verify_ir(ir::IRCode, print::Bool=true) end for op in userefs(stmt) op = op[] - check_op(ir, domtree, op, bb, idx, print) + check_op(ir, op, bb, idx, print) end end end diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 887bd4cde2614..ef46f882307d2 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -161,7 +161,7 @@ function f_ifelse(x) return b ? x + 1 : x end # 2 for now because the compiler leaves a GotoNode around -@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 +@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 # Test that inlining of _apply properly hits the inference cache @noinline cprop_inline_foo1() = (1, 1) diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 855100c8de85f..d8c86932265e6 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -34,8 +34,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) - ir = Core.Compiler.domsort_ssa!(ir, domtree) + ir = Core.Compiler.domsort_ssa!(ir) Core.Compiler.verify_ir(ir) phi = ir.stmts.inst[3] @test isa(phi, Core.PhiNode) && length(phi.edges) == 1 @@ -62,8 +61,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) - ir = Core.Compiler.domsort_ssa!(ir, domtree) + ir = Core.Compiler.domsort_ssa!(ir) Core.Compiler.verify_ir(ir) end diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index f90bb71e291d0..b825440ffb5c1 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -70,7 +70,7 @@ let cfg = CFG(BasicBlock[ dfs = Compiler.DFS(cfg.blocks) @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4 let correct_idoms = Compiler.naive_idoms(cfg.blocks) - @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms + @test cfg.domtree.idoms_bb == correct_idoms # For completeness, reverse the order of pred/succ in the CFG and verify # the answer doesn't change (it does change the which node is chosen # as the semi-dominator, since it changes the DFS numbering). @@ -217,12 +217,14 @@ end # nodes let ci = make_ci([ # Block 1 - Core.Compiler.GotoNode(3), - # Block 2 (no predecessors) - Core.Compiler.ReturnNode(3), - # Block 3 - Core.PhiNode(Int32[1, 2], Any[100, 200]), - Core.Compiler.ReturnNode(Core.SSAValue(3)) + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 4), + # Block 2 + Core.Compiler.GotoNode(4), + # Block 3 (no predecessors) + Core.Compiler.GotoNode(4), + # Block 4 + Core.PhiNode(Int32[1, 2, 3], Any[100, 200, 300]), + Core.Compiler.ReturnNode(Core.SSAValue(4)) ]) ir = Core.Compiler.inflate_ir(ci) ir = Core.Compiler.compact!(ir, true) @@ -260,53 +262,98 @@ let cfg = CFG(BasicBlock[ make_bb([2, 6], []), make_bb([4], [5, 3]), ], Int[]) - domtree = Compiler.construct_domtree(cfg.blocks) - @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test removal of edge between a parent and child in the DFS tree, which # should trigger complete recomputation of domtree (first case in algorithm # for removing edge from domtree dynamically) Compiler.cfg_delete_edge!(cfg, 2, 5) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4] # Add edge back (testing first case for insertion) Compiler.cfg_insert_edge!(cfg, 2, 5) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test second case in algorithm for removing edges from domtree, in which # `from` is on a semidominator path from the semidominator of `to` to `to` Compiler.cfg_delete_edge!(cfg, 6, 5) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4] # Add edge back (testing second case for insertion) Compiler.cfg_insert_edge!(cfg, 6, 5) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test last case for removing edges, in which edge does not satisfy either # of the above conditions Compiler.cfg_delete_edge!(cfg, 6, 3) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Add edge back (testing second case for insertion) Compiler.cfg_insert_edge!(cfg, 6, 3) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Try removing all edges from root Compiler.cfg_delete_edge!(cfg, 1, 2) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4] Compiler.cfg_delete_edge!(cfg, 1, 3) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0] # Add edges back Compiler.cfg_insert_edge!(cfg, 1, 2) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0] Compiler.cfg_insert_edge!(cfg, 1, 3) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] +end + +# Make sure killing edges while iterating through an `IncrementalCompact` kills +# all the statements in the dead blocks too. +# +# Block that becomes unreachable is before the active BB +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoNode(4), + # Block 2 + Core.Compiler.PhiNode(Any[2, 4], Any[100, 200]), + Core.Compiler.GotoNode(2), + # Block 3 + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 2), + # Block 4 + Core.Compiler.ReturnNode(0) + ]) + ir = Core.Compiler.inflate_ir(ci) + compact = Core.Compiler.IncrementalCompact(ir, true) + Core.Compiler.foreach(x -> begin + # Delete edge to block 2 when at statement 4 + if x.first == 4 + Core.Compiler.kill_edge!( + compact, compact.active_result_bb, 3, 2) + compact.result[4] = Core.Compiler.GotoNode(4) + end + end, compact) + ir = Core.Compiler.finish(compact) + @test Core.Compiler.verify_ir(ir) === nothing + @test ir.stmts[2][:inst] === nothing +end +# Block that becomes unreachable is after the active BB +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 3), + # Block 2 + Core.Compiler.GotoNode(5), + # Block 3 + Core.Compiler.PhiNode(Any[2, 3], Any[100, 200]), + Core.Compiler.GotoNode(3), + # Block 4 + Core.Compiler.ReturnNode(0) + ]) + ir = Core.Compiler.inflate_ir(ci) + compact = Core.Compiler.IncrementalCompact(ir, true) + Core.Compiler.foreach(x -> begin + # Delete edge to block 3 when at statement 1 + if x.first == 1 + Core.Compiler.kill_edge!( + compact, compact.active_result_bb, 1, 3) + compact.result[1] = Core.Compiler.GotoNode(2) + end + end, compact) + ir = Core.Compiler.finish(compact) + @test Core.Compiler.verify_ir(ir) === nothing + @test ir.stmts[3][:inst] === nothing end