From bb6e299c6e2db9e8a0aaad024826c93e171c868f Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Mon, 14 Oct 2019 20:49:52 -0400 Subject: [PATCH 01/16] Decline to remove phi nodes when this might cause forward references Phi nodes are optimized away when there is only one predecessor, but this can cause problems in dead loops because forward references can be created, leading to issues with optimization passes that look at all code, dead or not. This fixes issue #29107 when DCE is turned on. --- base/compiler/ssair/ir.jl | 17 +++++++++++++++-- test/compiler/ssair.jl | 40 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 4 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 621072527a334..95cdcc15b788b 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -500,9 +500,11 @@ mutable struct IncrementalCompact ir::IRCode result::InstructionStream result_bbs::Vector{BasicBlock} + ssa_rename::Vector{Any} bb_rename_pred::Vector{Int} bb_rename_succ::Vector{Int} + used_ssas::Vector{Int} late_fixup::Vector{Int} perm::Vector{Int} @@ -512,6 +514,7 @@ mutable struct IncrementalCompact # TODO: Switch these two to a min-heap of some sort pending_nodes::NewNodeStream # New nodes that were after the compaction point at insertion time pending_perm::Vector{Int} + # State idx::Int result_idx::Int @@ -519,6 +522,7 @@ mutable struct IncrementalCompact renamed_new_nodes::Bool cfg_transforms_enabled::Bool fold_constant_branches::Bool + function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false) # Sort by position with attach after nodes after regular ones perm = my_sortperm(Int[let new_node = code.new_nodes.info[i] @@ -871,7 +875,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: # Note: We recursively kill as many edges as are obviously dead. However, this # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or # worstcase during codegen). - preds, succs = compact.result_bbs[compact.bb_rename_succ[to]].preds, compact.result_bbs[compact.bb_rename_pred[from]].succs + preds = compact.result_bbs[compact.bb_rename_succ[to]].preds + succs = compact.result_bbs[compact.bb_rename_pred[from]].succs deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int) deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int) # Check if the block is now dead @@ -985,7 +990,15 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr result_idx += 1 elseif isa(stmt, PhiNode) values = process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa) - if length(stmt.edges) == 1 && isassigned(values, 1) && + # Don't remove the phi node if it is before the definition of its value + # because doing so can create forward references. This should only + # happen with dead loops, but can cause problems when optimization + # passes look at all code, dead or not. This check should be + # unnecessary when DCE can remove those dead loops entirely, so this is + # just to be safe. + before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) && + idx < values[1].id + if length(stmt.edges) == 1 && isassigned(values, 1) && !before_def && length(compact.cfg_transforms_enabled ? compact.result_bbs[compact.bb_rename_succ[active_bb]].preds : compact.ir.cfg.blocks[active_bb].preds) == 1 diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 3f7b9b02a728f..657d7a4224aae 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -1,5 +1,6 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +using Base.Meta using Core.IR const Compiler = Core.Compiler @@ -135,7 +136,6 @@ end @test f32579(0, false) === false # Test for bug caused by renaming blocks improperly, related to PR #32145 -using Base.Meta let ci = (Meta.@lower 1 + 1).args[1] ci.code = [ # block 1 @@ -166,7 +166,6 @@ let ci = (Meta.@lower 1 + 1).args[1] end # Test that GlobalRef in value position is non-canonical -using Base.Meta let ci = (Meta.@lower 1 + 1).args[1] ci.code = [ Expr(:call, GlobalRef(Main, :something_not_defined_please)) @@ -180,3 +179,40 @@ let ci = (Meta.@lower 1 + 1).args[1] ir = Core.Compiler.compact!(ir, true) @test_throws ErrorException Core.Compiler.verify_ir(ir, false) end + +# Issue #29107 +let ci = (Meta.@lower 1 + 1).args[1] + ci.code = [ + # Block 1 + Core.Compiler.GotoNode(6), + # Block 2 + # The following phi node gets deleted because it only has one edge, so + # the call to `something` is made to use the value of `something2()`, + # even though this value is defined after it. We don't want this to + # happen even though this block is dead because subsequent optimization + # passes may look at all code, dead or not. + Core.PhiNode(Any[2], Any[Core.SSAValue(4)]), + Expr(:call, :something, Core.SSAValue(2)), + Expr(:call, :something2), + Core.Compiler.GotoNode(2), + # Block 3 + Core.Compiler.ReturnNode(1000) + ] + nstmts = length(ci.code) + ci.ssavaluetypes = nstmts + ci.codelocs = fill(Int32(1), nstmts) + ci.ssaflags = fill(Int32(0), nstmts) + ir = Core.Compiler.inflate_ir(ci) + ir = Core.Compiler.compact!(ir, true) + # Make sure that if there is a call to `something` (block 2 should be + # removed entirely with working DCE), it doesn't use any SSA values that + # come after it. + for i in 1:length(ir.stmts) + s = ir.stmts[i] + if isa(s, Expr) && s.head == :call && s.args[1] == :something + if isa(s.args[2], SSAValue) + @test s.args[2].id <= i + end + end + end +end From 714e28c1fceaf53ed64270e04532f566ae8d4700 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Tue, 29 Oct 2019 16:08:08 -0400 Subject: [PATCH 02/16] Remove edges from dead blocks from phi nodes --- base/compiler/ssair/driver.jl | 3 +-- base/compiler/ssair/ir.jl | 39 +++++++++++++++++++++++++--- base/compiler/ssair/passes.jl | 9 ++++++- test/compiler/irpasses.jl | 3 +-- test/compiler/ssair.jl | 49 +++++++++++++++++++++++------------ 5 files changed, 79 insertions(+), 24 deletions(-) diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index 0ff6943aa5408..cb8e9ce01d902 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -128,8 +128,7 @@ function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState) #@timeit "verify 2" verify_ir(ir) ir = compact!(ir) #@Base.show ("before_sroa", ir) - @timeit "domtree 2" domtree = construct_domtree(ir.cfg) - @timeit "SROA" ir = getfield_elim_pass!(ir, domtree) + @timeit "SROA" ir = getfield_elim_pass!(ir) #@Base.show ir.new_nodes #@Base.show ("after_sroa", ir) ir = adce_pass!(ir) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 95cdcc15b788b..9fb4ff6745188 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -989,7 +989,41 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa) result_idx += 1 elseif isa(stmt, PhiNode) - values = process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa) + if compact.cfg_transforms_enabled + # Rename phi node edges + map!(i -> compact.bb_rename_pred[i], stmt.edges, stmt.edges) + + # Remove edges and values associated with dead blocks. Entries in + # `values` can be undefined when the phi node refers to something + # that is not defined. (This is not a sign that the compiler is + # unintentionally leaving some entries in `values` uninitialized.) + # For example, consider a reference to a variable that is only + # defined if some branch is taken. + # + # In order to leave undefined values undefined (undefined-ness is + # not a value we can copy), we copy only the edges and (defined) + # values we want to keep to new arrays initialized with undefined + # elements. + edges = Vector{Any}(undef, length(stmt.edges)) + values = Vector{Any}(undef, length(stmt.values)) + new_index = 1 + for old_index in 1:length(stmt.edges) + if stmt.edges[old_index] != -1 + edges[new_index] = stmt.edges[old_index] + if isassigned(stmt.values, old_index) + values[new_index] = stmt.values[old_index] + end + new_index += 1 + end + end + resize!(edges, new_index-1) + resize!(values, new_index-1) + else + edges = stmt.edges + values = stmt.values + end + + values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa) # Don't remove the phi node if it is before the definition of its value # because doing so can create forward references. This should only # happen with dead loops, but can cause problems when optimization @@ -998,14 +1032,13 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr # just to be safe. before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) && idx < values[1].id - if length(stmt.edges) == 1 && isassigned(values, 1) && !before_def && + if length(edges) == 1 && isassigned(values, 1) && !before_def && length(compact.cfg_transforms_enabled ? compact.result_bbs[compact.bb_rename_succ[active_bb]].preds : compact.ir.cfg.blocks[active_bb].preds) == 1 # There's only one predecessor left - just replace it ssa_rename[idx] = values[1] else - edges = compact.cfg_transforms_enabled ? map!(i->compact.bb_rename_pred[i], stmt.edges, stmt.edges) : stmt.edges result[result_idx][:inst] = PhiNode(edges, values) result_idx += 1 end diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index a2dc6d6e75f60..8e4cda9220b1c 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -524,7 +524,7 @@ function perform_lifting!(compact::IncrementalCompact, end assertion_counter = 0 -function getfield_elim_pass!(ir::IRCode, domtree::DomTree) +function getfield_elim_pass!(ir::IRCode) compact = IncrementalCompact(ir) insertions = Vector{Any}() defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}() @@ -721,6 +721,13 @@ function getfield_elim_pass!(ir::IRCode, domtree::DomTree) used_ssas = copy(compact.used_ssas) simple_dce!(compact) ir = complete(compact) + + # Compute domtree, needed below, now that we have finished compacting the + # IR. This needs to be after we iterate through the IR with + # `IncrementalCompact` because removing dead blocks can invalidate the + # domtree. + @timeit "domtree 2" domtree = construct_domtree(ir.cfg) + # Now go through any mutable structs and see which ones we can eliminate for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index be7539cac4d9f..e4b9d970e9293 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -180,8 +180,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any]) @test Core.Compiler.verify_ir(ir) === nothing - domtree = Core.Compiler.construct_domtree(ir.cfg) - ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir, domtree) + ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir) @test Core.Compiler.verify_ir(ir) === nothing end diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 657d7a4224aae..e61d4b0f8d5d1 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -3,6 +3,19 @@ using Base.Meta using Core.IR const Compiler = Core.Compiler +using .Compiler: CFG, BasicBlock + +make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs) + +function make_ci(code) + ci = (Meta.@lower 1 + 1).args[1] + ci.code = code + nstmts = length(ci.code) + ci.ssavaluetypes = nstmts + ci.codelocs = fill(Int32(1), nstmts) + ci.ssaflags = fill(Int32(0), nstmts) + return ci +end # TODO: this test is broken #let code = Any[ @@ -29,7 +42,6 @@ const Compiler = Core.Compiler #end # Issue #31121 -using .Compiler: CFG, BasicBlock # We have the following CFG and corresponding DFS numbering: # @@ -48,7 +60,6 @@ using .Compiler: CFG, BasicBlock # than `3`, so the idom search missed that `1` is `3`'s semi-dominator). Here # we manually construct that CFG and verify that the DFS records the correct # parent. -make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs) let cfg = CFG(BasicBlock[ make_bb([] , [2, 3]), make_bb([1] , [4, 5]), @@ -136,8 +147,7 @@ end @test f32579(0, false) === false # Test for bug caused by renaming blocks improperly, related to PR #32145 -let ci = (Meta.@lower 1 + 1).args[1] - ci.code = [ +let ci = make_ci([ # block 1 Core.Compiler.GotoIfNot(Expr(:boundscheck), 6), # block 2 @@ -155,11 +165,7 @@ let ci = (Meta.@lower 1 + 1).args[1] Core.Compiler.ReturnNode(Core.SSAValue(8)), # block 6 Core.Compiler.ReturnNode(Core.SSAValue(8)) - ] - nstmts = length(ci.code) - ci.ssavaluetypes = nstmts - ci.codelocs = fill(Int32(1), nstmts) - ci.ssaflags = fill(Int32(0), nstmts) + ]) ir = Core.Compiler.inflate_ir(ci) ir = Core.Compiler.compact!(ir, true) @test Core.Compiler.verify_ir(ir) == nothing @@ -181,8 +187,7 @@ let ci = (Meta.@lower 1 + 1).args[1] end # Issue #29107 -let ci = (Meta.@lower 1 + 1).args[1] - ci.code = [ +let ci = make_ci([ # Block 1 Core.Compiler.GotoNode(6), # Block 2 @@ -197,11 +202,7 @@ let ci = (Meta.@lower 1 + 1).args[1] Core.Compiler.GotoNode(2), # Block 3 Core.Compiler.ReturnNode(1000) - ] - nstmts = length(ci.code) - ci.ssavaluetypes = nstmts - ci.codelocs = fill(Int32(1), nstmts) - ci.ssaflags = fill(Int32(0), nstmts) + ]) ir = Core.Compiler.inflate_ir(ci) ir = Core.Compiler.compact!(ir, true) # Make sure that if there is a call to `something` (block 2 should be @@ -216,3 +217,19 @@ let ci = (Meta.@lower 1 + 1).args[1] end end end + +# Make sure dead blocks that are removed are not still referenced in live phi +# nodes +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoNode(3), + # Block 2 (no predecessors) + Core.Compiler.ReturnNode(3), + # Block 3 + Core.PhiNode(Any[1, 2], Any[100, 200]), + Core.Compiler.ReturnNode(Core.SSAValue(3)) + ]) + ir = Core.Compiler.inflate_ir(ci) + ir = Core.Compiler.compact!(ir, true) + @test Core.Compiler.verify_ir(ir) == nothing +end From 1c9d4988a2396910573a05b436846ee2398e3a34 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Mon, 25 Nov 2019 13:25:08 -0500 Subject: [PATCH 03/16] Fix possible merge cycle bug in `cfg_simplify!` --- base/compiler/ssair/passes.jl | 101 +++++++++++++++++++--------------- test/compiler/irpasses.jl | 24 ++++++++ 2 files changed, 81 insertions(+), 44 deletions(-) diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 8e4cda9220b1c..c53e8660d7887 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1041,30 +1041,49 @@ function cfg_simplify!(ir::IRCode) bbs = ir.cfg.blocks merge_into = zeros(Int, length(bbs)) merged_succ = zeros(Int, length(bbs)) + function follow_merge_into(idx::Int) + while merge_into[idx] != 0 + idx = merge_into[idx] + end + return idx + end + function follow_merged_succ(idx::Int) + while merged_succ[idx] != 0 + idx = merged_succ[idx] + end + return idx + end - # Walk the CFG at from the entry block and aggressively combine blocks + # Walk the CFG from the entry block and aggressively combine blocks for (idx, bb) in enumerate(bbs) if length(bb.succs) == 1 succ = bb.succs[1] if length(bbs[succ].preds) == 1 - merge_into[succ] = idx - merged_succ[idx] = succ + # Prevent cycles by making sure we don't end up back at `idx` + # by following what is to be merged into `succ` + if follow_merged_succ(succ) != idx + merge_into[succ] = idx + merged_succ[idx] = succ + end end end end + + # Assign new BB numbers max_bb_num = 1 bb_rename_succ = zeros(Int, length(bbs)) - # Lay out the basic blocks for i = 1:length(bbs) + # Drop blocks that will be merged away if merge_into[i] != 0 bb_rename_succ[i] = -1 - continue end - # Drop unreachable blocks + # Drop blocks with no predecessors if i != 1 && length(ir.cfg.blocks[i].preds) == 0 bb_rename_succ[i] = -1 end + bb_rename_succ[i] != 0 && continue + curr = i while true bb_rename_succ[curr] = max_bb_num @@ -1072,9 +1091,7 @@ function cfg_simplify!(ir::IRCode) # Now walk the chain of blocks we merged. # If we end in something that may fall through, # we have to schedule that block next - while merged_succ[curr] != 0 - curr = merged_succ[curr] - end + curr = follow_merged_succ(curr) terminator = ir.stmts[ir.cfg.blocks[curr].stmts[end]][:inst] if isa(terminator, GotoNode) || isa(terminator, ReturnNode) break @@ -1082,19 +1099,24 @@ function cfg_simplify!(ir::IRCode) curr += 1 end end + + # Figure out how predecessors should be renamed bb_rename_pred = zeros(Int, length(bbs)) for i = 1:length(bbs) if merged_succ[i] != 0 + # Block `i` should no longer be a predecessor (before renaming) + # because it is being merged with its sole successor bb_rename_pred[i] = -1 continue end - bbnum = i - while merge_into[bbnum] != 0 - bbnum = merge_into[bbnum] - end + bbnum = follow_merge_into(i) bb_rename_pred[i] = bb_rename_succ[bbnum] end + + # Compute map from new to old blocks result_bbs = Int[findfirst(j->i==j, bb_rename_succ) for i = 1:max_bb_num-1] + + # Compute new block lengths result_bbs_lengths = zeros(Int, max_bb_num-1) for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb @@ -1103,41 +1125,32 @@ function cfg_simplify!(ir::IRCode) ms = merged_succ[ms] end end + + # Compute statement indices the new blocks start at bb_starts = Vector{Int}(undef, 1+length(result_bbs_lengths)) bb_starts[1] = 1 for i = 1:length(result_bbs_lengths) bb_starts[i+1] = bb_starts[i] + result_bbs_lengths[i] end - # Figure out the pred and succ lists for each basic block in our merged result - cresult_bbs = let result_bbs = result_bbs, - merged_succ = merged_succ, - merge_into = merge_into, - bbs = bbs, - bb_rename_succ = bb_rename_succ - function compute_succs(i) - # Look at the original successor - orig_bb = result_bbs[i] - while merged_succ[orig_bb] != 0 - orig_bb = merged_succ[orig_bb] - end - newsuccs = map(i->bb_rename_succ[i], bbs[orig_bb].succs) - return newsuccs - end - function compute_preds(i) - orig_bb = result_bbs[i] - preds = bbs[orig_bb].preds - newpreds = map(preds) do pred - while merge_into[pred] != 0 - pred = merge_into[pred] - end - bb_rename_succ[pred] - end - return newpreds - end - BasicBlock[BasicBlock( - StmtRange(bb_starts[i], i + 1 > length(bb_starts) ? length(compact.result) : bb_starts[i + 1] - 1), - compute_preds(i), compute_succs(i)) for i = 1:length(result_bbs)] - end + + # Compute (renamed) successors and predecessors given (renamed) block + function compute_succs(i) + orig_bb = follow_merged_succ(result_bbs[i]) + return map(i -> bb_rename_succ[i], bbs[orig_bb].succs) + end + function compute_preds(i) + orig_bb = result_bbs[i] + preds = bbs[orig_bb].preds + return map(pred -> bb_rename_pred[pred], preds) + end + + cresult_bbs = BasicBlock[ + BasicBlock(StmtRange(bb_starts[i], + i+1 > length(bb_starts) ? + length(compact.result) : bb_starts[i+1]-1), + compute_preds(i), + compute_succs(i)) + for i = 1:length(result_bbs)] compact = IncrementalCompact(ir, true) # Run instruction compaction to produce the result, # but we're messing with the CFG @@ -1146,6 +1159,7 @@ function cfg_simplify!(ir::IRCode) compact.bb_rename_succ = bb_rename_succ compact.bb_rename_pred = bb_rename_pred compact.result_bbs = cresult_bbs + result_idx = 1 for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb while ms != 0 @@ -1165,7 +1179,6 @@ function cfg_simplify!(ir::IRCode) ms = merged_succ[ms] end end - compact.active_result_bb = length(bb_starts) return finish(compact) end diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index e4b9d970e9293..34bd185eb2e73 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -256,6 +256,30 @@ let m = Meta.@lower 1 + 1 @test isa(ret_2, Core.Compiler.ReturnNode) && ret_2.val == 2 end +let m = Meta.@lower 1 + 1 + # Test that CFG simplify doesn't try to merge every block in a loop into + # its predecessor + @assert Meta.isexpr(m, :thunk) + src = m.args[1]::Core.CodeInfo + src.code = Any[ + # Block 1 + Core.Compiler.GotoNode(2), + # Block 2 + Core.Compiler.GotoNode(3), + # Block 3 + Core.Compiler.GotoNode(1) + ] + nstmts = length(src.code) + src.ssavaluetypes = nstmts + src.codelocs = fill(Int32(1), nstmts) + src.ssaflags = fill(Int32(0), nstmts) + ir = Core.Compiler.inflate_ir(src) + Core.Compiler.verify_ir(ir) + ir = Core.Compiler.cfg_simplify!(ir) + Core.Compiler.verify_ir(ir) + @test length(ir.cfg.blocks) == 1 +end + # Issue #29213 function f_29213() while true From 32fbdceb500c0453c6ee83a3db0c3e3c59e320d0 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Tue, 26 Nov 2019 16:34:43 -0500 Subject: [PATCH 04/16] Fix bug in `kill_edge!` which caused edges to be removed from the wrong phi nodes --- base/compiler/ssair/ir.jl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 9fb4ff6745188..7a969a88fa444 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -893,10 +893,13 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: compact.result[last(stmts)][:inst] = ReturnNode() end else - # We need to remove this edge from any phi nodes + # Remove this edge from all phi nodes in `to` block + # NOTE: It is possible for `to` to contain only `nothing` statements, + # so we must be careful to stop at its last statement if to < active_bb - idx = first(compact.result_bbs[compact.bb_rename_succ[to]].stmts) - while idx < length(compact.result) + stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts + idx = first(stmts) + while idx <= last(stmts) stmt = compact.result[idx][:inst] stmt === nothing && continue isa(stmt, PhiNode) || break @@ -908,8 +911,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: idx += 1 end else - idx = first(compact.ir.cfg.blocks[to].stmts) - for stmt in CompactPeekIterator(compact, idx) + stmts = compact.ir.cfg.blocks[to].stmts + for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) stmt === nothing && continue isa(stmt, PhiNode) || break i = findfirst(x-> x === from, stmt.edges) @@ -1131,10 +1134,19 @@ end struct CompactPeekIterator compact::IncrementalCompact start_idx::Int + end_idx::Int +end + +function CompactPeekIterator(compact::IncrementalCompact, start_idx::Int) + return CompactPeekIterator(compact, start_idx, 0) end entry_at_idx(entry::NewNodeInfo, idx::Int) = entry.attach_after ? entry.pos == idx - 1 : entry.pos == idx function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.start_idx, it.compact.new_nodes_idx, 1)) + if it.end_idx > 0 && idx > it.end_idx + return nothing + end + # TODO: Take advantage of the fact that these arrays are sorted # TODO: this return value design is horrible compact = it.compact From 425939e4a827127581768139c32bd9988470af20 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Sat, 14 Dec 2019 17:21:46 -0500 Subject: [PATCH 05/16] Fix bug in which uses of SSA values processed in `just_fixup!` were not counted This fixes #29253, which was caused by `simple_dce!` erroneously erasing SSA values that did not appear to be used, because these uses were only discovered in `just_fixup!` at the end of iterating over an `IncrementalCompact`. --- base/compiler/ssair/ir.jl | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 7a969a88fa444..7de77542cb72b 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -283,11 +283,13 @@ function setindex!(x::IRCode, @nospecialize(repl), s::SSAValue) return x end - +# SSA values that need renaming struct OldSSAValue id::Int end +# SSA values that are in `new_new_nodes` of an `IncrementalCompact` and are to +# be actually inserted next time (they become `new_nodes` next time) struct NewSSAValue id::Int end @@ -824,7 +826,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int} return values end -function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int}, do_rename_ssa::Bool) +function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, do_rename_ssa::Bool) id = val.id if id > length(ssanums) return val @@ -833,14 +835,14 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int val = ssanums[id] end if isa(val, SSAValue) - if used_ssa !== nothing - used_ssa[val.id] += 1 + if used_ssas !== nothing + used_ssas[val.id] += 1 end end return val end -function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) +function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool) urs = userefs(stmt) for op in urs val = op[] @@ -848,7 +850,7 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vec push!(late_fixup, result_idx) end if isa(val, SSAValue) - val = renumber_ssa2(val, ssanums, used_ssa, do_rename_ssa) + val = renumber_ssa2(val, ssanums, used_ssas, do_rename_ssa) end if isa(val, OldSSAValue) || isa(val, NewSSAValue) push!(late_fixup, result_idx) @@ -1308,10 +1310,17 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt)) for ur in urs val = ur[] if isa(val, NewSSAValue) - ur[] = SSAValue(length(compact.result) + val.id) + val = SSAValue(length(compact.result) + val.id) elseif isa(val, OldSSAValue) - ur[] = compact.ssa_rename[val.id] + val = compact.ssa_rename[val.id] + end + if isa(val, SSAValue) && val.id <= length(compact.used_ssas) + # If `val.id` is greater than the length of `compact.result` or + # `compact.used_ssas`, this SSA value is in `new_new_nodes`, so + # don't count the use + compact.used_ssas[val.id] += 1 end + ur[] = val end return urs[] end From 9fd642058c65601119acf085cf1d472214d18553 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Sun, 2 Aug 2020 22:08:52 -0700 Subject: [PATCH 06/16] Fix small bug in #36684 PR #36684 changes `iterate(IncrementalCompact)` to return an extra index, but leaves its arguments unchanged. However, the PR decremented the index argument in a particular recursive call to `iterate`. This caused `iterate` not to recognise that it was done when `allow_cfg_transforms` was turned on. --- base/compiler/ssair/ir.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 7de77542cb72b..2ed5e6355e399 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -1206,7 +1206,7 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= # Move to next block compact.idx += 1 if finish_current_bb!(compact, active_bb, old_result_idx, true) - return iterate(compact, (compact.idx-1, active_bb + 1)) + return iterate(compact, (compact.idx, active_bb + 1)) else return Pair{Pair{Int, Int}, Any}(Pair{Int,Int}(compact.idx-1, old_result_idx), compact.result[old_result_idx][:inst]), (compact.idx, active_bb + 1) end From 524452b0d8bb58e5174b6ba84cc0f48a8e7e8570 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Tue, 15 Oct 2019 13:08:55 -0400 Subject: [PATCH 07/16] Implement algorithm for updating dominator tree with CFG edge insertions and deletions The DFS tree associated with a CFG now keeps track of postorder as well as preorder numbers. The DFS tree, as well as the state associated with the SNCA algorithm for finding (immediate) dominators is now stored in DomTree and reused for Dynamic SNCA. --- base/compiler/ssair/domtree.jl | 681 +++++++++++++++++++++------------ base/compiler/ssair/ir.jl | 16 + base/compiler/ssair/passes.jl | 2 +- test/compiler/ssair.jl | 84 +++- 4 files changed, 542 insertions(+), 241 deletions(-) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index f9b407f9ddb3e..66a16fc7e9a53 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -1,63 +1,482 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +# This file implements the Semi-NCA (SNCA) dominator tree construction +# described in Georgiadis' PhD thesis [LG05], which itself is a simplification +# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches +# the algorithm choice in LLVM and seems to be a sweet spot in implementation +# simplicity and efficiency. +# +# This file also implements an extension of SNCA that supports updating the +# dominator tree with insertion and deletion of edges in the control flow +# graph, described in [GI16] as Dynamic SNCA. DSNCA was chosen over DBS, a +# different algorithm which achieves the best overall performance in [GI16], +# because it is simpler to understand and implement, performs well with edge +# deletions, and is of similar performance overall. +# +# SNCA works by first computing semidominators, then computing immediate +# dominators from them. The semidominator of a node is the node with minimum +# preorder number such that there is a semidominator path from it to the node. +# A semidominator path is a path in which the preorder numbers of all nodes not +# at the endpoints are greater than the preorder number of the last node. +# Intuitively, the semidominator approximates the immediate dominator of a node +# by taking the path (in the CFG) that gets as close to the root as possible +# while avoiding ancestors of the node in the DFS tree. +# +# In computing the semidominators, SNCA performs "path compression" whenever a +# node has a nontrivial semidominator (i.e. a semidominator that is not just +# its parent in the DFS tree). Path compression propagates the "label" of a +# node, which represents a possible semidominator with associated semidominator +# path passing through that node. +# +# For example, path compression will be performed for the following CFG, where +# the edge not in the DFS tree is marked with asterisks. Note that nodes are +# labeled with their preorder numbers, and all edges point downward. +# +# 1 +# |\ +# | \ +# | 4 +# | | +# 2 5 +# | | +# | 6 +# | * +# |* +# 3 +# +# There is a nontrivial semidominator path from 1 to 3, passing through 4, 5, +# and 6. Stepping through the whole algorithm on paper with an example like +# this is very helpful for understanding how it works. +# +# DSNCA runs the whole algorithm from scratch if the DFS tree is invalidated by +# the insertion or deletion, but otherwise recomputes a subset of the +# semidominators (all immediate dominators then need to be recomputed). +# +# [LG05] Linear-Time Algorithms for Dominators and Related Problems +# Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: +# ftp://ftp.cs.princeton.edu/reports/2005/737.pdf +# +# [LT79] A fast algorithm for finding dominators in a flowgraph +# Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1 +# http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf +# +# [GI16] An Experimental Study of Dynamic Dominators +# Loukas Georgiadis, Giuseppe F. Italiano, Luigi Laura, Federico +# Santaroni, April 2016 +# https://arxiv.org/abs/1604.02711 + +# We could make these real structs, but probably not worth the extra +# overhead. Still, give them names for documentary purposes. +const BBNumber = Int +const PreNumber = Int +const PostNumber = Int + +struct DFSTree + # These map between BB number and pre- or postorder numbers + to_pre::Vector{PreNumber} + from_pre::Vector{BBNumber} + to_post::Vector{PostNumber} + from_post::Vector{BBNumber} + + # Records parent relationships in the DFS tree + # (preorder number -> preorder number) + # Storing it this way saves a few lookups in the snca_compress! algorithm + to_parent_pre::Vector{PreNumber} +end + +function DFSTree(n_blocks::Int) + return DFSTree(zeros(PreNumber, n_blocks), + Vector{BBNumber}(undef, n_blocks), + zeros(PostNumber, n_blocks), + Vector{BBNumber}(undef, n_blocks), + zeros(PreNumber, n_blocks)) +end + +copy(D::DFSTree) = DFSTree(copy(D.to_pre), + copy(D.from_pre), + copy(D.to_post), + copy(D.from_post), + copy(D.to_parent_pre)) + +function copy!(dst::DFSTree, src::DFSTree) + copy!(dst.to_pre, src.to_pre) + copy!(dst.from_pre, src.from_pre) + copy!(dst.to_post, src.to_post) + copy!(dst.from_post, src.from_post) + copy!(dst.to_parent_pre, src.to_parent_pre) + return dst +end + +length(D::DFSTree) = length(D.from_pre) + +function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) + copy!(D, DFSTree(length(blocks))) + to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)] + pre_num = 1 + post_num = 1 + while !isempty(to_visit) + # Because we want the postorder number as well as the preorder number, + # we don't pop the current node from the stack until we're moving up + # the tree + (current_node_bb, parent_pre, pushed_children) = to_visit[end] + + if pushed_children + # Going up the DFS tree, so all we need to do is record the + # postorder number, then move on + D.to_post[current_node_bb] = post_num + D.from_post[post_num] = current_node_bb + post_num += 1 + pop!(to_visit) + + elseif D.to_pre[current_node_bb] != 0 + # Node has already been visited, move on + pop!(to_visit) + continue + else + # Going down the DFS tree + + # Record preorder number + D.to_pre[current_node_bb] = pre_num + D.from_pre[pre_num] = current_node_bb + D.to_parent_pre[pre_num] = parent_pre + + # Record that children (will) have been pushed + to_visit[end] = (current_node_bb, parent_pre, true) + + # Push children to the stack + for succ_bb in cfg.blocks[current_node_bb].succs + push!(to_visit, (succ_bb, pre_num, false)) + end + + pre_num += 1 + end + end + + # If all blocks are reachable, this is a no-op, otherwise, we shrink these + # arrays. + resize!(D.from_pre, pre_num - 1) + resize!(D.from_post, post_num - 1) # should be same size as pre_num - 1 + resize!(D.to_parent_pre, pre_num - 1) + + return D +end + +DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks) + +""" +Keeps the per-BB state of the Semi NCA algorithm. In the original formulation, +there are three separate length `n` arrays, `label`, `semi` and `ancestor`. +Instead, for efficiency, we use one array in a array-of-structs style setup. +""" +struct SNCAData + semi::PreNumber + label::PreNumber +end + "Represents a Basic Block, in the DomTree" struct DomTreeNode # How deep we are in the DomTree level::Int # The BB indices in the CFG for all Basic Blocks we immediately dominate - children::Vector{Int} + children::Vector{BBNumber} end -DomTreeNode() = DomTreeNode(1, Vector{Int}()) + +DomTreeNode() = DomTreeNode(1, Vector{BBNumber}()) "Data structure that encodes which basic block dominates which." struct DomTree - # Which basic block immediately dominates each basic block (ordered by BB indices) - # Note: this is the inverse of the nodes, children field - idoms::Vector{Int} + # These can be reused when updating domtree dynamically + dfs_tree::DFSTree + snca_state::Vector{SNCAData} + + # Which basic block immediately dominates each basic block, using BB indices + idoms_bb::Vector{BBNumber} # The nodes in the tree (ordered by BB indices) nodes::Vector{DomTreeNode} end +function DomTree() + return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) +end + +function construct_domtree(cfg::CFG) + return update_domtree!(cfg, DomTree(), true, 0) +end + +function update_domtree!(cfg::CFG, domtree::DomTree, + recompute_dfs::Bool, max_pre::PreNumber) + if recompute_dfs + DFS!(domtree.dfs_tree, blocks) + end + + if max_pre == 0 + max_pre = length(domtree.dfs_tree) + end + + SNCA!(domtree, cfg, max_pre) + compute_domtree_nodes!(domtree) + return domtree +end + +function compute_domtree_nodes!(domtree::DomTree) + # Compute children + copy!(domtree.nodes, + DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)]) + for (idx, idom) in Iterators.enumerate(domtree.idoms_bb) + (idx == 1 || idom == 0) && continue + push!(domtree.nodes[idom].children, idx) + end + # Recursively set level + update_level!(domtree.nodes, 1, 1) + return domtree.nodes +end + +function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int) + worklist = Tuple{BBNumber, Int}[(node, level)] + while !isempty(worklist) + (node, level) = pop!(worklist) + nodes[node] = DomTreeNode(level, nodes[node].children) + foreach(nodes[node].children) do child + push!(worklist, (child, level+1)) + end + end +end + +""" +The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. Note that the +pseudocode in [LG05] is not entirely accurate. The best way to understand +what's happening is to read [LT79], then the description of SLT in [LG05] +(warning: inconsistent notation), then the description of Semi-NCA. +""" +function SNCA!(domtree::DomTree, cfg::CFG, max_pre::PreNumber) + D = domtree.dfs_tree + state = domtree.snca_state + # There may be more blocks than are reachable in the DFS / dominator tree + n_blocks = length(cfg.blocks) + n_nodes = length(D) + + # `label` is initialized to the identity mapping (though the paper doesn't + # make that clear). The rationale for this is Lemma 2.4 in [LG05] (i.e. + # Theorem 4 in [LT79]). Note however, that we don't ever look at `semi` + # until it is fully initialized, so we could leave it uninitialized here if + # we wanted to. + resize!(state, n_nodes) + for w in 1:max_pre + state[w] = SNCAData(typemax(PreNumber), w) + end + + # Calculate semidominators, but only for blocks with preorder number up to + # max_pre + ancestors = copy(D.to_parent_pre) + for w::PreNumber in reverse(2:max_pre) + # LLVM initializes this to the parent, the paper initializes this to + # `w`, but it doesn't really matter (the parent is a predecessor, so at + # worst we'll discover it below). Save a memory reference here. + semi_w = typemax(PreNumber) + last_linked = PreNumber(w + 1) + for v ∈ cfg.blocks[D.from_pre[w]].preds + # For the purpose of the domtree, ignore virtual predecessors into + # catch blocks. + v == 0 && continue + + v_pre = D.to_pre[v] + + # Ignore unreachable predecessors + v_pre == 0 && continue + + # N.B.: This conditional is missing from the pseudocode in figure + # 2.8 of [LG05]. It corresponds to the `ancestor[v] != 0` check in + # the `eval` implementation in figure 2.6 + if v_pre >= last_linked + # `v` has already been processed, so perform path compression + + # For performance, if the number of ancestors is small avoid + # the extra allocation of the worklist. + if length(ancestors) <= 32 + snca_compress!(state, ancestors, v_pre, last_linked) + else + snca_compress_worklist!(state, ancestors, v_pre, last_linked) + end + end + + # The (preorder number of the) semidominator of a block is the + # minimum over the labels of its predecessors + semi_w = min(semi_w, state[v_pre].label) + end + state[w] = SNCAData(semi_w, semi_w) + end + + # Compute immediate dominators, which for a node must be the nearest common + # ancestor in the (immediate) dominator tree between its semidominator and + # its parent (see Lemma 2.6 in [LG05]). + idoms_pre = copy(D.to_parent_pre) + for v in 2:n_nodes + idom = idoms_pre[v] + vsemi = state[v].semi + while idom > vsemi + idom = idoms_pre[idom] + end + idoms_pre[v] = idom + end + + # Express idoms in BB indexing + resize!(domtree.idoms_bb, n_blocks) + for i::BBNumber in 1:n_blocks + if i == 1 || D.to_pre[i] == 0 + domtree.idoms_bb[i] = 0 + else + domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]] + end + end +end + """ - Checks if bb1 dominates bb2. - bb1 and bb2 are indexes into the CFG blocks. - bb1 dominates bb2 if the only way to enter bb2 is via bb1. - (Other blocks may be in between, e.g bb1->bbX->bb2). +Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the +modification suggested in the paper to use `last_linked` to determine whether +an ancestor has been processed rather than storing `0` in the ancestor array. """ -function dominates(domtree::DomTree, bb1::Int, bb2::Int) +function snca_compress!(state::Vector{SNCAData}, ancestors::Vector{PreNumber}, + v::PreNumber, last_linked::PreNumber) + u = ancestors[v] + @assert u < v + if u >= last_linked + snca_compress!(state, ancestors, u, last_linked) + if state[u].label < state[v].label + state[v] = SNCAData(state[v].semi, state[u].label) + end + ancestors[v] = ancestors[u] + end + nothing +end + +function snca_compress_worklist!( + state::Vector{SNCAData}, ancestors::Vector{PreNumber}, + v::PreNumber, last_linked::PreNumber) + # TODO: There is a smarter way to do this + u = ancestors[v] + worklist = Tuple{PreNumber, PreNumber}[(u,v)] + @assert u < v + while !isempty(worklist) + u, v = last(worklist) + if u >= last_linked + if ancestors[u] >= last_linked + push!(worklist, (ancestors[u], u)) + continue + end + if state[u].label < state[v].label + state[v] = SNCAData(state[v].semi, state[u].label) + end + ancestors[v] = ancestors[u] + end + pop!(worklist) + end +end + +"Given an updated CFG, update the given dominator tree with an inserted edge." +function domtree_insert_edge!(domtree::DomTree, cfg::CFG, + from::BBNumber, to::BBNumber) + # Implements Section 3.1 of [GI16] + dt = domtree.dfs_tree + from_pre = dt.to_pre[from] + to_pre = dt.to_pre[to] + from_post = dt.to_post[from] + to_post = dt.to_post[to] + if to_pre == 0 || (from_pre < to_pre && from_post < to_post) + # The DFS tree is invalidated by the edge insertion, so run from + # scratch + update_domtree!(cfg, domtree, true, 0) + else + # DFS tree is still valid, so update only affected nodes + update_domtree!(cfg, domtree, false, to_pre) + end + + return domtree +end + +"Given an updated CFG, update the given dominator tree with a deleted edge." +function domtree_delete_edge!(domtree::DomTree, cfg::CFG, + from::BBNumber, to::BBNumber) + # Implements Section 3.1 of [GI16] + if is_parent(domtree.dfs_tree, from, to) + # The `from` block is the parent of the `to` block in the DFS tree, so + # deleting the edge invalidates the DFS tree, so start from scratch + update_domtree!(cfg, domtree, true, 0) + elseif on_semidominator_path(domtree, from, to) + # Recompute semidominators for blocks with preorder number up to that + # of `to` block. Semidominators for blocks with preorder number greater + # than that of `to` aren't affected because no semidominator path to + # the block can pass through the `to` block (the preorder number of + # `to` would be lower than those of these blocks, and `to` is not their + # parent in the DFS tree). + to_pre = domtree.dfs_tree.to_pre[to] + update_domtree!(cfg, domtree, false, to_pre) + end + # Otherwise, dominator tree is not affected + + return domtree +end + +"Check if x is the parent of y in the given DFS tree." +function is_parent(dfs_tree::DFSTree, x::BBNumber, y::BBNumber) + x_pre = dfs_tree.to_pre[x] + y_pre = dfs_tree.to_pre[y] + return x_pre == dfs_tree.to_parent_pre[y_pre] +end + +""" +Check if x is on some semidominator path from the semidominator of y to y, +assuming there is an edge from x to y. +""" +function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber) + x_pre = domtree.dfs_tree.to_pre[x] + y_pre = domtree.dfs_tree.to_pre[y] + + semi_y = domtree.snca_state[y_pre].semi + current_block = x_pre + + # Follow the semidominators of `x` up the DFS tree to see if we ever reach + # the semidominator of `y`. If so, `x` is on a semidominator path between + # `y` and its semidominator. We can stop if the preorder number of the + # semidominators becomes less than that of the semidominator of `y`, + # because it can only decrease further. + while current_block >= semi_y + if semi_y == current_block + return true + end + current_block = domtree.snca_state[current_block].semi + end + return false +end + +""" +Checks if bb1 dominates bb2. +bb1 and bb2 are indexes into the CFG blocks. +bb1 dominates bb2 if the only way to enter bb2 is via bb1. +(Other blocks may be in between, e.g bb1->bbX->bb2). +""" +function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) bb1 == bb2 && return true target_level = domtree.nodes[bb1].level source_level = domtree.nodes[bb2].level source_level < target_level && return false for _ in (source_level - 1):-1:target_level - bb2 = domtree.idoms[bb2] + bb2 = domtree.idoms_bb[bb2] end return bb1 == bb2 end -bb_unreachable(domtree::DomTree, bb::Int) = bb != 1 && domtree.nodes[bb].level == 1 - -function update_level!(domtree::Vector{DomTreeNode}, node::Int, level::Int) - worklist = Tuple{Int, Int}[(node, level)] - while !isempty(worklist) - (node, level) = pop!(worklist) - domtree[node] = DomTreeNode(level, domtree[node].children) - foreach(domtree[node].children) do child - push!(worklist, (child, level+1)) - end - end -end +bb_unreachable(domtree::DomTree, bb::BBNumber) = bb != 1 && domtree.dfs_tree.to_pre[bb] == 0 "Iterable data structure that walks though all dominated blocks" struct DominatedBlocks domtree::DomTree - worklist::Vector{Int} + worklist::Vector{BBNumber} end "Returns an iterator that walks through all blocks dominated by the basic block at index `root`" -function dominated(domtree::DomTree, root::Int) - doms = DominatedBlocks(domtree, Vector{Int}()) +function dominated(domtree::DomTree, root::BBNumber) + doms = DominatedBlocks(domtree, Vector{BBNumber}()) push!(doms.worklist, root) doms end @@ -115,213 +534,3 @@ function naive_idoms(cfg::CFG) end idoms end - -# Construct Dom Tree -function construct_domtree(cfg::CFG) - idoms = SNCA(cfg) - # Compute children - nblocks = length(cfg.blocks) - domtree = DomTreeNode[DomTreeNode() for _ = 1:nblocks] - for (idx, idom) in Iterators.enumerate(idoms) - (idx == 1 || idom == 0) && continue - push!(domtree[idom].children, idx) - end - # Recursively set level - update_level!(domtree, 1, 1) - DomTree(idoms, domtree) -end - -#================================ [SNCA] ======================================# -# -# This section implements the Semi-NCA (SNCA) dominator tree construction from -# described in Georgiadis' PhD thesis [LG05], which itself is a simplification -# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches -# the algorithm choice in LLVM and seems to be a sweet spot in implementation -# simplicity and efficiency. -# -# [LG05] Linear-Time Algorithms for Dominators and Related Problems -# Loukas Georgiadis, Princeton University, November 2005, pp. 21-23: -# ftp://ftp.cs.princeton.edu/reports/2005/737.pdf -# -# [LT79] A fast algorithm for finding dominators in a flowgraph -# Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1 -# http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf -# -begin - # We could make these real structs, but probably not worth the extra - # overhead. Still, give them names for documentary purposes. - const BBNumber = UInt - const DFSNumber = UInt - - """ - Keeps the per-BB state of the Semi NCA algorithm. In the original - formulation, there are three separate length `n` arrays, `label`, `semi` and - `ancestor`. Instead, for efficiency, we use one array in a array-of-structs - style setup. - """ - struct Node - semi::DFSNumber - label::DFSNumber - end - - struct DFSTree - # Maps DFS number to BB number - numbering::Vector{BBNumber} - # Maps BB number to DFS number - reverse::Vector{DFSNumber} - # Records parent relationships in the DFS tree (DFS number -> DFS number) - # Storing it this way saves a few lookups in the snca_compress! algorithm - parents::Vector{DFSNumber} - end - length(D::DFSTree) = length(D.numbering) - preorder(D::DFSTree) = OneTo(length(D)) - _drop(xs::AbstractUnitRange, n::Integer) = (first(xs)+n):last(xs) - - function DFSTree(nblocks::Int) - DFSTree( - Vector{BBNumber}(undef, nblocks), - zeros(DFSNumber, nblocks), - Vector{DFSNumber}(undef, nblocks)) - end - - function DFS(cfg::CFG, current_node::BBNumber)::DFSTree - dfs = DFSTree(length(cfg.blocks)) - # TODO: We could reuse the storage in DFSTree for our worklist. We're - # guaranteed for the worklist to be smaller than the remaining space in - # DFSTree - worklist = Tuple{DFSNumber, BBNumber}[(0, current_node)] - dfs_num = 1 - parent = 0 - while !isempty(worklist) - (parent, current_node) = pop!(worklist) - dfs.reverse[current_node] != 0 && continue - dfs.reverse[current_node] = dfs_num - dfs.numbering[dfs_num] = current_node - dfs.parents[dfs_num] = parent - for succ in cfg.blocks[current_node].succs - push!(worklist, (dfs_num, succ)) - end - dfs_num += 1 - end - # If all blocks are reachable, this is a no-op, otherwise, - # we shrink these arrays. - resize!(dfs.numbering, dfs_num - 1) - resize!(dfs.parents, dfs_num - 1) - dfs - end - - """ - Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the - modification suggested in the paper to use `last_linked` to determine - whether an ancestor has been processed rather than storing `0` in the - ancestor array. - """ - function snca_compress!(state::Vector{Node}, ancestors::Vector{DFSNumber}, - v::DFSNumber, last_linked::DFSNumber) - u = ancestors[v] - @assert u < v - if u >= last_linked - snca_compress!(state, ancestors, u, last_linked) - if state[u].label < state[v].label - state[v] = Node(state[v].semi, state[u].label) - end - ancestors[v] = ancestors[u] - end - nothing - end - - function snca_compress_worklist!( - state::Vector{Node}, ancestors::Vector{DFSNumber}, - v::DFSNumber, last_linked::DFSNumber) - # TODO: There is a smarter way to do this - u = ancestors[v] - worklist = Tuple{DFSNumber, DFSNumber}[(u,v)] - @assert u < v - while !isempty(worklist) - u, v = last(worklist) - if u >= last_linked - if ancestors[u] >= last_linked - push!(worklist, (ancestors[u], u)) - continue - end - if state[u].label < state[v].label - state[v] = Node(state[v].semi, state[u].label) - end - ancestors[v] = ancestors[u] - end - pop!(worklist) - end - end - - """ - SNCA(cfg::CFG) - - Determines a map from basic blocks to the block which immediately dominate them. - Expressed as indexes into `cfg.blocks`. - - The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. - Note that the pseudocode in [LG05] is not entirely accurate. - The best way to understand what's happening is to read [LT79], then the - description of SLT in [LG05] (warning: inconsistent notation), then - the description of Semi-NCA. - """ - function SNCA(cfg::CFG) - D = DFS(cfg, BBNumber(1)) - # `label` is initialized to the identity mapping (though - # the paper doesn't make that clear). The rational for this is Lemma - # 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't - # ever look at `semi` until it is fully initialized, so we could leave - # it uninitialized here if we wanted to. - state = Node[ Node(typemax(DFSNumber), w) for w in preorder(D) ] - # Initialize idoms to parents. Note that while idoms are eventually - # BB indexed, we keep it DFS indexed until a final post-processing - # pass to avoid extra memory references during the O(N^2) phase below. - idoms_dfs = copy(D.parents) - # We abuse the parents array as the ancestors array. - # Semi-NCA does not look at the parents array at all. - # SLT would, but never simultaneously, so we could still - # do this. - ancestors = D.parents - for w::DFSNumber ∈ reverse(_drop(preorder(D), 1)) - # LLVM initializes this to the parent, the paper initializes this to - # `w`, but it doesn't really matter (the parent is a predecessor, - # so at worst we'll discover it below). Save a memory reference here. - semi_w = typemax(DFSNumber) - for v ∈ cfg.blocks[D.numbering[w]].preds - # For the purpose of the domtree, ignore virtual predecessors - # into catch blocks. - v == 0 && continue - vdfs = D.reverse[v] - # Ignore unreachable predecessors - vdfs == 0 && continue - last_linked = DFSNumber(w + 1) - # N.B.: This conditional is missing from the psuedocode - # in figure 2.8 of [LG05]. It corresponds to the - # `ancestor[v] != 0` check in the `eval` implementation in - # figure 2.6 - if vdfs >= last_linked - # For performance, if the number of ancestors is small - # avoid the extra allocation of the worklist. - if length(ancestors) <= 32 - snca_compress!(state, ancestors, vdfs, last_linked) - else - snca_compress_worklist!(state, ancestors, vdfs, last_linked) - end - end - semi_w = min(semi_w, state[vdfs].label) - end - state[w] = Node(semi_w, semi_w) - end - for v ∈ _drop(preorder(D), 1) - idom = idoms_dfs[v] - vsemi = state[v].semi - while idom > vsemi - idom = idoms_dfs[idom] - end - idoms_dfs[v] = idom - end - # Reexpress the idom relationship in BB indexing - idoms_bb = Int[ (i == 1 || D.reverse[i] == 0) ? 0 : D.numbering[idoms_dfs[D.reverse[i]]] for i = 1:length(cfg.blocks) ] - idoms_bb - end -end diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 2ed5e6355e399..8e61abf4fe000 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -36,6 +36,22 @@ struct CFG # TODO: make this O(1) instead of O(log(n_blocks))? end +function cfg_insert_edge!(cfg::CFG, from::Int, to::Int) + # Assumes that this edge does not already exist + push!(cfg.blocks[to].preds, from) + push!(cfg.blocks[from].succs, to) + nothing +end + +function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) + preds = cfg.blocks[to].preds + succs = cfg.blocks[from].succs + # Assumes that blocks appear at most once in preds and succs + deleteat!(preds, findfirst(x->x === from, preds)::Int) + deleteat!(succs, findfirst(x->x === to, succs)::Int) + nothing +end + function block_for_inst(index::Vector{Int}, inst::Int) return searchsortedfirst(index, inst, lt=(<=)) end diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index c53e8660d7887..cb315633c1ec6 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -55,7 +55,7 @@ function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int) # TODO: This can be much faster by looking at current level and only # searching for those blocks in a sorted order while !(curblock in allblocks) - curblock = domtree.idoms[curblock] + curblock = domtree.idoms_bb[curblock] end return curblock end diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index e61d4b0f8d5d1..644a988d9d5a3 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -67,10 +67,10 @@ let cfg = CFG(BasicBlock[ make_bb([2, 3] , [5] ), make_bb([2, 4] , [] ), ], Int[]) - dfs = Compiler.DFS(cfg, Compiler.BBNumber(1)) - @test dfs.numbering[dfs.parents[dfs.reverse[5]]] == 4 + dfs = Compiler.DFS(cfg) + @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4 let correct_idoms = Compiler.naive_idoms(cfg) - @test Compiler.SNCA(cfg) == correct_idoms + @test Compiler.construct_domtree(cfg).idoms_bb == correct_idoms # For completeness, reverse the order of pred/succ in the CFG and verify # the answer doesn't change (it does change the which node is chosen # as the semi-dominator, since it changes the DFS numbering). @@ -81,7 +81,7 @@ let cfg = CFG(BasicBlock[ c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs)) d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs)) cfg′ = CFG(blocks, cfg.index) - @test Compiler.SNCA(cfg′) == correct_idoms + @test Compiler.construct_domtree(cfg′).idoms_bb == correct_idoms end end end @@ -233,3 +233,79 @@ let ci = make_ci([ ir = Core.Compiler.compact!(ir, true) @test Core.Compiler.verify_ir(ir) == nothing end + +# Test dynamic update of domtree with edge insertions and deletions in the +# following CFG: +# +# 1,1 +# | \ +# | \ +# | 3,4 < +# | | \ +# 2,2 4,5 | +# | | / +# | 6,6 / +# | / +# | / +# 5,3 +# +# Nodes indicate BB number, preorder number +# Edges point down, except the arrow that points up +let cfg = CFG(BasicBlock[ + make_bb([], [3, 2]), # the order of the successors is deliberate + make_bb([1], [5]), # and is to determine the preorder numbers + make_bb([1, 6], [4]), + make_bb([3], [6]), + make_bb([2, 6], []), + make_bb([4], [5, 3]), + ], Int[]) + domtree = Compiler.construct_domtree(cfg) + @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + + # Test removal of edge between a parent and child in the DFS tree, which + # should trigger complete recomputation of domtree (first case in algorithm + # for removing edge from domtree dynamically) + Compiler.cfg_delete_edge!(cfg, 2, 5) + Compiler.domtree_delete_edge!(domtree, cfg, 2, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 6, 4] + # Add edge back (testing first case for insertion) + Compiler.cfg_insert_edge!(cfg, 2, 5) + Compiler.domtree_insert_edge!(domtree, cfg, 2, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + + # Test second case in algorithm for removing edges from domtree, in which + # `from` is on a semidominator path from the semidominator of `to` to `to` + Compiler.cfg_delete_edge!(cfg, 6, 5) + Compiler.domtree_delete_edge!(domtree, cfg, 6, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 2, 4] + # Add edge back (testing second case for insertion) + Compiler.cfg_insert_edge!(cfg, 6, 5) + Compiler.domtree_insert_edge!(domtree, cfg, 6, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + + # Test last case for removing edges, in which edge does not satisfy either + # of the above conditions + Compiler.cfg_delete_edge!(cfg, 6, 3) + Compiler.domtree_delete_edge!(domtree, cfg, 6, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + # Add edge back (testing second case for insertion) + Compiler.cfg_insert_edge!(cfg, 6, 3) + Compiler.domtree_insert_edge!(domtree, cfg, 6, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + + # Try removing all edges from root + Compiler.cfg_delete_edge!(cfg, 1, 2) + Compiler.domtree_delete_edge!(domtree, cfg, 1, 2) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 1, 3, 6, 4] + Compiler.cfg_delete_edge!(cfg, 1, 3) + Compiler.domtree_delete_edge!(domtree, cfg, 1, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 0, 0, 0, 0] + # Add edges back + Compiler.cfg_insert_edge!(cfg, 1, 2) + Compiler.domtree_insert_edge!(domtree, cfg, 1, 2) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 0, 0, 2, 0] + Compiler.cfg_insert_edge!(cfg, 1, 3) + Compiler.domtree_insert_edge!(domtree, cfg, 1, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] +end From c73f19e640162f26a8dbbc3eb83276c110d129f6 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Mon, 28 Oct 2019 15:39:26 -0400 Subject: [PATCH 08/16] Remove dead blocks as determined by reachability instead of number of predecessors For now, just construct the domtree when we make an `IncrementalCompact` rather than try to update it (the domtree) incrementally. --- base/compiler/ssair/ir.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 8e61abf4fe000..54081db8d28e4 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -553,8 +553,9 @@ mutable struct IncrementalCompact if allow_cfg_transforms bb_rename = Vector{Int}(undef, length(blocks)) cur_bb = 1 + domtree = construct_domtree(blocks) for i = 1:length(bb_rename) - if i != 1 && length(blocks[i].preds) == 0 + if bb_unreachable(domtree, i) bb_rename[i] = -1 else bb_rename[i] = cur_bb @@ -1201,8 +1202,8 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= resize!(compact, old_result_idx) end bb = compact.ir.cfg.blocks[active_bb] - if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && length(bb.preds) == 0 - # No predecessors, kill the entire block. + if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && compact.bb_rename_succ[active_bb] == -1 + # Dead block, so kill the entire block. compact.idx = last(bb.stmts) # Pop any remaining insertion nodes while compact.new_nodes_idx <= length(compact.perm) From 12b34fe85411e6169bee33883a27a9d4f9ba7933 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Mon, 28 Oct 2019 15:30:39 -0400 Subject: [PATCH 09/16] Make domtree construction take `Vector{BasicBlock}` as input instead of CFG This is in anticipation of domtrees being added to CFGs. --- base/compiler/ssair/domtree.jl | 40 +++++++++++++------------- base/compiler/ssair/driver.jl | 2 +- base/compiler/ssair/passes.jl | 2 +- base/compiler/ssair/verify.jl | 2 +- test/compiler/irpasses.jl | 4 +-- test/compiler/ssair.jl | 52 +++++++++++++++++----------------- 6 files changed, 51 insertions(+), 51 deletions(-) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 66a16fc7e9a53..c4781da584910 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -144,7 +144,7 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) to_visit[end] = (current_node_bb, parent_pre, true) # Push children to the stack - for succ_bb in cfg.blocks[current_node_bb].succs + for succ_bb in blocks[current_node_bb].succs push!(to_visit, (succ_bb, pre_num, false)) end @@ -200,11 +200,11 @@ function DomTree() return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) end -function construct_domtree(cfg::CFG) - return update_domtree!(cfg, DomTree(), true, 0) +function construct_domtree(blocks::Vector{BasicBlock}) + return update_domtree!(blocks, DomTree(), true, 0) end -function update_domtree!(cfg::CFG, domtree::DomTree, +function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree, recompute_dfs::Bool, max_pre::PreNumber) if recompute_dfs DFS!(domtree.dfs_tree, blocks) @@ -214,7 +214,7 @@ function update_domtree!(cfg::CFG, domtree::DomTree, max_pre = length(domtree.dfs_tree) end - SNCA!(domtree, cfg, max_pre) + SNCA!(domtree, blocks, max_pre) compute_domtree_nodes!(domtree) return domtree end @@ -249,11 +249,11 @@ pseudocode in [LG05] is not entirely accurate. The best way to understand what's happening is to read [LT79], then the description of SLT in [LG05] (warning: inconsistent notation), then the description of Semi-NCA. """ -function SNCA!(domtree::DomTree, cfg::CFG, max_pre::PreNumber) +function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) D = domtree.dfs_tree state = domtree.snca_state # There may be more blocks than are reachable in the DFS / dominator tree - n_blocks = length(cfg.blocks) + n_blocks = length(blocks) n_nodes = length(D) # `label` is initialized to the identity mapping (though the paper doesn't @@ -275,7 +275,7 @@ function SNCA!(domtree::DomTree, cfg::CFG, max_pre::PreNumber) # worst we'll discover it below). Save a memory reference here. semi_w = typemax(PreNumber) last_linked = PreNumber(w + 1) - for v ∈ cfg.blocks[D.from_pre[w]].preds + for v ∈ blocks[D.from_pre[w]].preds # For the purpose of the domtree, ignore virtual predecessors into # catch blocks. v == 0 && continue @@ -373,8 +373,8 @@ function snca_compress_worklist!( end end -"Given an updated CFG, update the given dominator tree with an inserted edge." -function domtree_insert_edge!(domtree::DomTree, cfg::CFG, +"Given updated blocks, update the given dominator tree with an inserted edge." +function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, from::BBNumber, to::BBNumber) # Implements Section 3.1 of [GI16] dt = domtree.dfs_tree @@ -385,23 +385,23 @@ function domtree_insert_edge!(domtree::DomTree, cfg::CFG, if to_pre == 0 || (from_pre < to_pre && from_post < to_post) # The DFS tree is invalidated by the edge insertion, so run from # scratch - update_domtree!(cfg, domtree, true, 0) + update_domtree!(blocks, domtree, true, 0) else # DFS tree is still valid, so update only affected nodes - update_domtree!(cfg, domtree, false, to_pre) + update_domtree!(blocks, domtree, false, to_pre) end return domtree end -"Given an updated CFG, update the given dominator tree with a deleted edge." -function domtree_delete_edge!(domtree::DomTree, cfg::CFG, +"Given updated blocks, update the given dominator tree with a deleted edge." +function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, from::BBNumber, to::BBNumber) # Implements Section 3.1 of [GI16] if is_parent(domtree.dfs_tree, from, to) # The `from` block is the parent of the `to` block in the DFS tree, so # deleting the edge invalidates the DFS tree, so start from scratch - update_domtree!(cfg, domtree, true, 0) + update_domtree!(blocks, domtree, true, 0) elseif on_semidominator_path(domtree, from, to) # Recompute semidominators for blocks with preorder number up to that # of `to` block. Semidominators for blocks with preorder number greater @@ -410,7 +410,7 @@ function domtree_delete_edge!(domtree::DomTree, cfg::CFG, # `to` would be lower than those of these blocks, and `to` is not their # parent in the DFS tree). to_pre = domtree.dfs_tree.to_pre[to] - update_domtree!(cfg, domtree, false, to_pre) + update_domtree!(blocks, domtree, false, to_pre) end # Otherwise, dominator tree is not affected @@ -490,8 +490,8 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing) return (bb, nothing) end -function naive_idoms(cfg::CFG) - nblocks = length(cfg.blocks) +function naive_idoms(blocks::Vector{BasicBlock}) + nblocks = length(blocks) # The extra +1 helps us detect unreachable blocks below dom_all = BitSet(1:nblocks+1) dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks] @@ -499,10 +499,10 @@ function naive_idoms(cfg::CFG) while changed changed = false for n = 2:nblocks - if isempty(cfg.blocks[n].preds) + if isempty(blocks[n].preds) continue end - firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, cfg.blocks[n].preds)) + firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds)) new_doms = copy(dominators[firstp]) for p in rest intersect!(new_doms, dominators[p]) diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index cb8e9ce01d902..cd4215a97de08 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -111,7 +111,7 @@ end function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code - @timeit "domtree 1" domtree = construct_domtree(ir.cfg) + @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks) defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst) @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` return ir diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index cb315633c1ec6..107a6cf389b34 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -726,7 +726,7 @@ function getfield_elim_pass!(ir::IRCode) # IR. This needs to be after we iterate through the IR with # `IncrementalCompact` because removing dead blocks can invalidate the # domtree. - @timeit "domtree 2" domtree = construct_domtree(ir.cfg) + @timeit "domtree 2" domtree = construct_domtree(ir.cfg.blocks) # Now go through any mutable structs and see which ones we can eliminate for (idx, (intermediaries, defuse)) in defuses diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index 67c03184d4ad5..b169018517bd7 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -66,7 +66,7 @@ function verify_ir(ir::IRCode, print::Bool=true) # Verify CFG last_end = 0 # Verify statements - domtree = construct_domtree(ir.cfg) + domtree = construct_domtree(ir.cfg.blocks) for (idx, block) in pairs(ir.cfg.blocks) if first(block.stmts) != last_end + 1 #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)] diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index 34bd185eb2e73..c6190b720c5be 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -34,7 +34,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg) + domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) ir = Core.Compiler.domsort_ssa!(ir, domtree) Core.Compiler.verify_ir(ir) phi = ir.stmts.inst[3] @@ -62,7 +62,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg) + domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) ir = Core.Compiler.domsort_ssa!(ir, domtree) Core.Compiler.verify_ir(ir) end diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 644a988d9d5a3..79e8c06ce26e1 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -67,10 +67,10 @@ let cfg = CFG(BasicBlock[ make_bb([2, 3] , [5] ), make_bb([2, 4] , [] ), ], Int[]) - dfs = Compiler.DFS(cfg) + dfs = Compiler.DFS(cfg.blocks) @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4 - let correct_idoms = Compiler.naive_idoms(cfg) - @test Compiler.construct_domtree(cfg).idoms_bb == correct_idoms + let correct_idoms = Compiler.naive_idoms(cfg.blocks) + @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms # For completeness, reverse the order of pred/succ in the CFG and verify # the answer doesn't change (it does change the which node is chosen # as the semi-dominator, since it changes the DFS numbering). @@ -81,7 +81,7 @@ let cfg = CFG(BasicBlock[ c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs)) d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs)) cfg′ = CFG(blocks, cfg.index) - @test Compiler.construct_domtree(cfg′).idoms_bb == correct_idoms + @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms end end end @@ -259,53 +259,53 @@ let cfg = CFG(BasicBlock[ make_bb([2, 6], []), make_bb([4], [5, 3]), ], Int[]) - domtree = Compiler.construct_domtree(cfg) + domtree = Compiler.construct_domtree(cfg.blocks) @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test removal of edge between a parent and child in the DFS tree, which # should trigger complete recomputation of domtree (first case in algorithm # for removing edge from domtree dynamically) Compiler.cfg_delete_edge!(cfg, 2, 5) - Compiler.domtree_delete_edge!(domtree, cfg, 2, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 6, 4] + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4] # Add edge back (testing first case for insertion) Compiler.cfg_insert_edge!(cfg, 2, 5) - Compiler.domtree_insert_edge!(domtree, cfg, 2, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test second case in algorithm for removing edges from domtree, in which # `from` is on a semidominator path from the semidominator of `to` to `to` Compiler.cfg_delete_edge!(cfg, 6, 5) - Compiler.domtree_delete_edge!(domtree, cfg, 6, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 2, 4] + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4] # Add edge back (testing second case for insertion) Compiler.cfg_insert_edge!(cfg, 6, 5) - Compiler.domtree_insert_edge!(domtree, cfg, 6, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test last case for removing edges, in which edge does not satisfy either # of the above conditions Compiler.cfg_delete_edge!(cfg, 6, 3) - Compiler.domtree_delete_edge!(domtree, cfg, 6, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Add edge back (testing second case for insertion) Compiler.cfg_insert_edge!(cfg, 6, 3) - Compiler.domtree_insert_edge!(domtree, cfg, 6, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Try removing all edges from root Compiler.cfg_delete_edge!(cfg, 1, 2) - Compiler.domtree_delete_edge!(domtree, cfg, 1, 2) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 1, 3, 6, 4] + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4] Compiler.cfg_delete_edge!(cfg, 1, 3) - Compiler.domtree_delete_edge!(domtree, cfg, 1, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 0, 0, 0, 0] + Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0] # Add edges back Compiler.cfg_insert_edge!(cfg, 1, 2) - Compiler.domtree_insert_edge!(domtree, cfg, 1, 2) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 0, 0, 2, 0] + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0] Compiler.cfg_insert_edge!(cfg, 1, 3) - Compiler.domtree_insert_edge!(domtree, cfg, 1, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4] + Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3) + @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] end From 88534d5890505e15da3e77f36915c47efefafede Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Wed, 30 Oct 2019 18:02:53 -0400 Subject: [PATCH 10/16] Add a domtree to every CFG, avoiding constructing them explicitly when needed Every time a CFG is created, its corresponding dominator tree is as well. --- base/compiler/ssair/domtree.jl | 13 +++++++++--- base/compiler/ssair/driver.jl | 3 +-- base/compiler/ssair/ir.jl | 14 +++++++++++-- base/compiler/ssair/passes.jl | 27 +++++++++--------------- base/compiler/ssair/slot2ssa.jl | 22 ++++++++++---------- base/compiler/ssair/verify.jl | 25 +++++++++++++++------- test/compiler/irpasses.jl | 6 ++---- test/compiler/ssair.jl | 37 ++++++++++++--------------------- 8 files changed, 77 insertions(+), 70 deletions(-) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index c4781da584910..310e76a463666 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -173,6 +173,8 @@ struct SNCAData label::PreNumber end +copy(d::SNCAData) = SNCAData(d.semi, d.label) + "Represents a Basic Block, in the DomTree" struct DomTreeNode # How deep we are in the DomTree @@ -183,6 +185,8 @@ end DomTreeNode() = DomTreeNode(1, Vector{BBNumber}()) +copy(n::DomTreeNode) = DomTreeNode(n.level, copy(n.children)) + "Data structure that encodes which basic block dominates which." struct DomTree # These can be reused when updating domtree dynamically @@ -196,9 +200,12 @@ struct DomTree nodes::Vector{DomTreeNode} end -function DomTree() - return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) -end +DomTree() = DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) + +copy(d::DomTree) = DomTree(copy(d.dfs_tree), + copy(d.snca_state), + copy(d.idoms_bb), + copy(d.nodes)) function construct_domtree(blocks::Vector{BasicBlock}) return update_domtree!(blocks, DomTree(), true, 0) diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index cd4215a97de08..f5d74bd1effc3 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -111,9 +111,8 @@ end function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState) # need `ci` for the slot metadata, IR for the code - @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks) defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst) - @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` + @timeit "construct_ssa" ir = construct_ssa!(ci, ir, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir` return ir end diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 54081db8d28e4..fb9796aa868ab 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -34,12 +34,22 @@ struct CFG blocks::Vector{BasicBlock} index::Vector{Int} # map from instruction => basic-block number # TODO: make this O(1) instead of O(log(n_blocks))? + domtree end +function CFG(blocks::Vector{BasicBlock}, index::Vector{Int}) + return CFG(blocks, index, construct_domtree(blocks)) +end + +copy(c::CFG) = CFG(BasicBlock[copy(b) for b in c.blocks], + copy(c.index), + copy(c.domtree)) + function cfg_insert_edge!(cfg::CFG, from::Int, to::Int) # Assumes that this edge does not already exist push!(cfg.blocks[to].preds, from) push!(cfg.blocks[from].succs, to) + domtree_insert_edge!(cfg.domtree, cfg.blocks, from, to) nothing end @@ -49,6 +59,7 @@ function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) # Assumes that blocks appear at most once in preds and succs deleteat!(preds, findfirst(x->x === from, preds)::Int) deleteat!(succs, findfirst(x->x === to, succs)::Int) + domtree_delete_edge!(cfg.domtree, cfg.blocks, from, to) nothing end @@ -553,9 +564,8 @@ mutable struct IncrementalCompact if allow_cfg_transforms bb_rename = Vector{Int}(undef, length(blocks)) cur_bb = 1 - domtree = construct_domtree(blocks) for i = 1:length(bb_rename) - if bb_unreachable(domtree, i) + if bb_unreachable(code.cfg.domtree, i) bb_rename[i] = -1 else bb_rename[i] = cur_bb diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 107a6cf389b34..da675f60b0b3e 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -69,8 +69,8 @@ function val_for_def_expr(ir::IRCode, def::Int, fidx::Int) end end -function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) - curblock = find_curblock(domtree, allblocks, curblock) +function compute_value_for_block(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int) + curblock = find_curblock(ir.cfg.domtree, allblocks, curblock) def = 0 for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock @@ -80,10 +80,10 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx) end -function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int) +function compute_value_for_use(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int) # Find the first dominating def curblock = stmtblock = block_for_inst(ir.cfg, use_idx) - curblock = find_curblock(domtree, allblocks, curblock) + curblock = find_curblock(ir.cfg.domtree, allblocks, curblock) defblockdefs = Int[stmt for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock] def = 0 if !isempty(defblockdefs) @@ -105,7 +105,7 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I if !haskey(phinodes, curblock) # If this happens, we need to search the predecessors for defs. Which # one doesn't matter - if it did, we'd have had a phinode - return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) + return compute_value_for_block(ir, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds)) end # The use is the phinode return phinodes[curblock] @@ -721,13 +721,6 @@ function getfield_elim_pass!(ir::IRCode) used_ssas = copy(compact.used_ssas) simple_dce!(compact) ir = complete(compact) - - # Compute domtree, needed below, now that we have finished compacting the - # IR. This needs to be after we iterate through the IR with - # `IncrementalCompact` because removing dead blocks can invalidate the - # domtree. - @timeit "domtree 2" domtree = construct_domtree(ir.cfg.blocks) - # Now go through any mutable structs and see which ones we can eliminate for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) @@ -794,7 +787,7 @@ function getfield_elim_pass!(ir::IRCode) ldu = compute_live_ins(ir.cfg, du) phiblocks = Int[] if !isempty(ldu.live_in_bbs) - phiblocks = idf(ir.cfg, ldu, domtree) + phiblocks = idf(ir.cfg, ldu) end phinodes = IdDict{Int, SSAValue}() for b in phiblocks @@ -804,19 +797,19 @@ function getfield_elim_pass!(ir::IRCode) # Now go through all uses and rewrite them allblocks = sort(vcat(phiblocks, ldu.def_bbs)) for stmt in du.uses - ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt) + ir[SSAValue(stmt)] = compute_value_for_use(ir, allblocks, du, phinodes, fidx, stmt) end if !isbitstype(fieldtype(typ, fidx)) for (use, list) in preserve_uses - push!(list, compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, use)) + push!(list, compute_value_for_use(ir, allblocks, du, phinodes, fidx, use)) end end for b in phiblocks for p in ir.cfg.blocks[b].preds n = ir[phinodes[b]] push!(n.edges, p) - push!(n.values, compute_value_for_block(ir, domtree, - allblocks, du, phinodes, fidx, p)) + push!(n.values, compute_value_for_block( + ir, allblocks, du, phinodes, fidx, p)) end end end diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index 5bea1c871610e..b0fad48c57ec7 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -271,10 +271,10 @@ end # subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted # at `A`). Note however that this does not work the other way. Thus, the algorithm # needs to make sure that we always visit `B` before `A`. -function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) +function idf(cfg::CFG, liveness::BlockLiveness) # This should be a priority queue, but TODO - sorted array for now defs = liveness.def_bbs - pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)] + pq = Tuple{Int, Int}[(defs[i], cfg.domtree.nodes[defs[i]].level) for i in 1:length(defs)] sort!(pq, by=x->x[2]) phiblocks = Int[] # This bitset makes sure we only add a phi node to a given block once. @@ -298,7 +298,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) # since at this point we know that there is an edge from `node`'s # subtree to `succ`, we know that if succ's level is greater than # that of `node`, it must be dominated by `node`. - succ_level = domtree.nodes[succ].level + succ_level = cfg.domtree.nodes[succ].level succ_level > level && continue # We don't dominate succ. We need to place a phinode, # unless liveness said otherwise. @@ -319,7 +319,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree) end end # Recurse down the current subtree - for child in domtree.nodes[active].children + for child in cfg.domtree.nodes[active].children child in visited && continue push!(visited, child) push!(worklist, child) @@ -374,7 +374,7 @@ end RPO traversal and in particular, any use of an SSA value must come after (by linear order) its definition. """ -function domsort_ssa!(ir::IRCode, domtree::DomTree) +function domsort_ssa!(ir::IRCode) # First compute the new order of basic blocks result_order = Int[] stack = Int[] @@ -383,7 +383,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree) nnewfallthroughs = 0 while node !== -1 push!(result_order, node) - cs = domtree.nodes[node].children + cs = ir.cfg.domtree.nodes[node].children terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst] iscondbr = isa(terminator, GotoIfNot) let old_node = node + 1 @@ -579,8 +579,8 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode return new_typ end -function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int, sptypes::Vector{Any}, - slottypes::Vector{Any}) +function construct_ssa!(ci::CodeInfo, ir::IRCode, defuse, nargs::Int, + sptypes::Vector{Any}, slottypes::Vector{Any}) code = ir.stmts.inst cfg = ir.cfg left = Int[] @@ -597,7 +597,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg for (enter_block, exc) in catch_entry_blocks exc_handlers[enter_block+1] = (enter_block, exc) # TODO: Cut off here if the terminator is a leave corresponding to this enter - for block in dominated(domtree, enter_block+1) + for block in dominated(cfg.domtree, enter_block+1) exc_handlers[block] = (enter_block, exc) end end @@ -649,7 +649,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg end end end - phiblocks = idf(cfg, live, domtree) + phiblocks = idf(cfg, live) for block in phiblocks push!(phi_slots[block], idx) node = PhiNode() @@ -893,6 +893,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg local node = new_nodes.stmts[i] node[:inst] = new_to_regular(renumber_ssa!(node[:inst], ssavalmap), nstmts) end - @timeit "domsort" ir = domsort_ssa!(ir, domtree) + @timeit "domsort" ir = domsort_ssa!(ir) return ir end diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl index b169018517bd7..6f7069f406747 100644 --- a/base/compiler/ssair/verify.jl +++ b/base/compiler/ssair/verify.jl @@ -11,7 +11,7 @@ if !isdefined(@__MODULE__, Symbol("@verify_error")) end end -function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool) +function check_op(ir::IRCode, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool) if isa(op, SSAValue) if op.id > length(ir.stmts) def_bb = block_for_inst(ir.cfg, ir.new_nodes[op.id - length(ir.stmts)].pos) @@ -28,7 +28,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, end end else - if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb)) + if !dominates(ir.cfg.domtree, def_bb, use_bb) && !(bb_unreachable(ir.cfg.domtree, def_bb) && bb_unreachable(ir.cfg.domtree, use_bb)) # At the moment, we allow GC preserve tokens outside the standard domination notion #@Base.show ir @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))" @@ -61,12 +61,23 @@ function count_int(val::Int, arr::Vector{Int}) end function verify_ir(ir::IRCode, print::Bool=true) + # Verify domtree + if ir.cfg.domtree.idoms_bb != construct_domtree(ir.cfg.blocks).idoms_bb + # Dynamically updating the domtree made it incorrect? + @verify_error "Dominator tree does not match recomputed one" + error() + end + # Verify domtree by comparing with naive algorithm, but only if there + # aren't too many blocks for it to handle + if length(ir.cfg.blocks) < 1000 && ir.cfg.domtree.idoms_bb != naive_idoms(ir.cfg.blocks) + @verify_error "Dominator tree does not match immediate dominators computed by naive algorithm" + error() + end # For now require compact IR # @assert isempty(ir.new_nodes) # Verify CFG last_end = 0 # Verify statements - domtree = construct_domtree(ir.cfg.blocks) for (idx, block) in pairs(ir.cfg.blocks) if first(block.stmts) != last_end + 1 #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)] @@ -76,7 +87,7 @@ function verify_ir(ir::IRCode, print::Bool=true) last_end = last(block.stmts) terminator = ir.stmts[last_end][:inst] - bb_unreachable(domtree, idx) && continue + bb_unreachable(ir.cfg.domtree, idx) && continue for p in block.preds p == 0 && continue c = count_int(idx, ir.cfg.blocks[p].succs) @@ -144,7 +155,7 @@ function verify_ir(ir::IRCode, print::Bool=true) for (bb, idx) in bbidxiter(ir) # We allow invalid IR in dead code to avoid passes having to detect when # they're generating dead code. - bb_unreachable(domtree, bb) && continue + bb_unreachable(ir.cfg.domtree, bb) && continue stmt = ir.stmts[idx][:inst] stmt === nothing && continue if isa(stmt, PhiNode) @@ -174,7 +185,7 @@ function verify_ir(ir::IRCode, print::Bool=true) @verify_error "GlobalRefs and Exprs are not allowed as PhiNode values" error() end - check_op(ir, domtree, val, edge, last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print) + check_op(ir, val, edge, last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print) end elseif isa(stmt, PhiCNode) for i = 1:length(stmt.values) @@ -211,7 +222,7 @@ function verify_ir(ir::IRCode, print::Bool=true) end for op in userefs(stmt) op = op[] - check_op(ir, domtree, op, bb, idx, print) + check_op(ir, op, bb, idx, print) end end end diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl index c6190b720c5be..afed12c2d6fc1 100644 --- a/test/compiler/irpasses.jl +++ b/test/compiler/irpasses.jl @@ -34,8 +34,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) - ir = Core.Compiler.domsort_ssa!(ir, domtree) + ir = Core.Compiler.domsort_ssa!(ir) Core.Compiler.verify_ir(ir) phi = ir.stmts.inst[3] @test isa(phi, Core.PhiNode) && length(phi.edges) == 1 @@ -62,8 +61,7 @@ let m = Meta.@lower 1 + 1 src.ssaflags = fill(Int32(0), nstmts) ir = Core.Compiler.inflate_ir(src) Core.Compiler.verify_ir(ir) - domtree = Core.Compiler.construct_domtree(ir.cfg.blocks) - ir = Core.Compiler.domsort_ssa!(ir, domtree) + ir = Core.Compiler.domsort_ssa!(ir) Core.Compiler.verify_ir(ir) end diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 79e8c06ce26e1..7f15f49045e48 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -70,7 +70,7 @@ let cfg = CFG(BasicBlock[ dfs = Compiler.DFS(cfg.blocks) @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4 let correct_idoms = Compiler.naive_idoms(cfg.blocks) - @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms + @test cfg.domtree.idoms_bb == correct_idoms # For completeness, reverse the order of pred/succ in the CFG and verify # the answer doesn't change (it does change the which node is chosen # as the semi-dominator, since it changes the DFS numbering). @@ -259,53 +259,42 @@ let cfg = CFG(BasicBlock[ make_bb([2, 6], []), make_bb([4], [5, 3]), ], Int[]) - domtree = Compiler.construct_domtree(cfg.blocks) - @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test removal of edge between a parent and child in the DFS tree, which # should trigger complete recomputation of domtree (first case in algorithm # for removing edge from domtree dynamically) Compiler.cfg_delete_edge!(cfg, 2, 5) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4] # Add edge back (testing first case for insertion) Compiler.cfg_insert_edge!(cfg, 2, 5) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test second case in algorithm for removing edges from domtree, in which # `from` is on a semidominator path from the semidominator of `to` to `to` Compiler.cfg_delete_edge!(cfg, 6, 5) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4] # Add edge back (testing second case for insertion) Compiler.cfg_insert_edge!(cfg, 6, 5) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Test last case for removing edges, in which edge does not satisfy either # of the above conditions Compiler.cfg_delete_edge!(cfg, 6, 3) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Add edge back (testing second case for insertion) Compiler.cfg_insert_edge!(cfg, 6, 3) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] # Try removing all edges from root Compiler.cfg_delete_edge!(cfg, 1, 2) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4] Compiler.cfg_delete_edge!(cfg, 1, 3) - Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0] # Add edges back Compiler.cfg_insert_edge!(cfg, 1, 2) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0] Compiler.cfg_insert_edge!(cfg, 1, 3) - Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3) - @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] + @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] end From 07f5c08258abaadea463f877a76fc8edd79b102f Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Mon, 18 Nov 2019 11:24:27 -0500 Subject: [PATCH 11/16] Update domtrees dynamically when CFGs get modified, and fix bug in dynamic domtree implementation --- base/compiler/ssair/domtree.jl | 91 ++++++++++++++++++++++ base/compiler/ssair/driver.jl | 1 - base/compiler/ssair/inlining.jl | 10 ++- base/compiler/ssair/ir.jl | 132 ++++++++++++++++++-------------- base/compiler/ssair/slot2ssa.jl | 13 +++- 5 files changed, 183 insertions(+), 64 deletions(-) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 310e76a463666..610d852e02f95 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -270,9 +270,28 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) # we wanted to. resize!(state, n_nodes) for w in 1:max_pre + # Only reset semidominators for nodes we want to recompute state[w] = SNCAData(typemax(PreNumber), w) end + # If we are only recomputing some of the semidominators, the remaining + # labels should be reset, because they may have become inapplicable to the + # node/semidominator we are currently processing/recomputing. They can + # become inapplicable because of path compressions that were triggered by + # nodes that should only be processed after the current one (but were + # processed the last time `SNCA!` was run). + # + # So, for every node that is not being reprocessed, we reset its label to + # its semidominator, which is the value that its label assumes once its + # semidominator is computed. If this was too conservative, i.e. if the + # label would have been updated before we process the current node in a + # situation where all semidominators were recomputed, then path compression + # will produce the correct label. + for w in max_pre+1:n_nodes + semi = state[w].semi + state[w] = SNCAData(semi, semi) + end + # Calculate semidominators, but only for blocks with preorder number up to # max_pre ancestors = copy(D.to_parent_pre) @@ -383,6 +402,11 @@ end "Given updated blocks, update the given dominator tree with an inserted edge." function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + # Implements Section 3.1 of [GI16] dt = domtree.dfs_tree from_pre = dt.to_pre[from] @@ -404,6 +428,11 @@ end "Given updated blocks, update the given dominator tree with a deleted edge." function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock}, from::BBNumber, to::BBNumber) + # `from` is unreachable, so `from` and `to` aren't in domtree + if bb_unreachable(domtree, from) + return domtree + end + # Implements Section 3.1 of [GI16] if is_parent(domtree.dfs_tree, from, to) # The `from` block is the parent of the `to` block in the DFS tree, so @@ -456,6 +485,68 @@ function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber) return false end +""" +Rename basic block numbers in a dominator tree, removing the block if it is +renamed to -1. +""" +function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber}) + # Rename DFS tree + rename_nodes!(domtree.dfs_tree, rename_bb) + + # `snca_state` is indexed by preorder number, so should be unchanged + + # Rename `idoms_bb` and `nodes` + old_idoms_bb = copy(domtree.idoms_bb) + old_nodes = copy(domtree.nodes) + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + domtree.idoms_bb[new_bb] = (new_bb == 1) ? + 0 : rename_bb[old_idoms_bb[old_bb]] + domtree.nodes[new_bb] = old_nodes[old_bb] + map!(i -> rename_bb[i], + domtree.nodes[new_bb].children, + domtree.nodes[new_bb].children) + end + end + + # length of `to_pre` after renaming DFS tree is new number of basic blocks + resize!(domtree.idoms_bb, length(domtree.dfs_tree.to_pre)) + resize!(domtree.nodes, length(domtree.dfs_tree.to_pre)) + return domtree +end + +""" +Rename basic block numbers in a DFS tree, removing the block if it is renamed +to -1. +""" +function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber}) + n_blocks = length(D.to_pre) + n_reachable_blocks = length(D.from_pre) + + old_to_pre = copy(D.to_pre) + old_from_pre = copy(D.from_pre) + old_to_post = copy(D.to_post) + old_from_post = copy(D.from_post) + max_new_bb = 0 + for (old_bb, new_bb) in enumerate(rename_bb) + if new_bb != -1 + D.to_pre[new_bb] = old_to_pre[old_bb] + D.from_pre[old_to_pre[old_bb]] = new_bb + D.to_post[new_bb] = old_to_post[old_bb] + D.from_post[old_to_post[old_bb]] = new_bb + + # Keep track of highest BB number to resize arrays with + if new_bb > max_new_bb + max_new_bb = new_bb + end + end + end + resize!(D.to_pre, max_new_bb) + resize!(D.to_post, max_new_bb) + # `to_parent_pre` should be unchanged + return D +end + """ Checks if bb1 dominates bb2. bb1 and bb2 are indexes into the CFG blocks. diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index f5d74bd1effc3..5a1bfcb28a530 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -121,7 +121,6 @@ function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState) ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, nargs, sv) ir = slot2reg(ir, ci, nargs, sv) #@Base.show ("after_construct", ir) - # TODO: Domsorting can produce an updated domtree - no need to recompute here @timeit "compact 1" ir = compact!(ir) @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv) #@timeit "verify 2" verify_ir(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index 926491223795c..4b9b7f5fd41e4 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -298,6 +298,7 @@ function finish_cfg_inline!(state::CFGInliningState) end end +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::InliningTodo, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -415,6 +416,7 @@ end const fatal_type_bound_error = ErrorException("fatal error in type inference (type bound)") +# NOTE: The domtree is not kept up-to-date with changes this makes function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, linetable::Vector{LineInfoNode}, item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) @@ -497,7 +499,9 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int, end function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool) - # Compute the new CFG first (modulo statement ranges, which will be computed below) + # Compute the new CFG first (modulo statement ranges, which will be + # computed below, and the domtree, which will be updated below before we + # iterate through the statements) state = CFGInliningState(ir) for item in todo if isa(item, UnionSplit) @@ -518,6 +522,9 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo let compact = IncrementalCompact(ir, false) compact.result_bbs = state.new_cfg_blocks + # Recompute the domtree now that the CFG has been modified + compact.result_domtree = construct_domtree(compact.result_bbs) + # This needs to be a minimum and is more of a size hint nn = 0 for item in todo @@ -572,7 +579,6 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo compact[idx] = PhiNode(Any[edge == length(state.bb_rename) ? length(state.new_cfg_blocks) : state.bb_rename[edge+1]-1 for edge in stmt.edges], stmt.values) end end - ir = finish(compact) end return ir diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index fb9796aa868ab..f13958f22afdb 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -34,7 +34,7 @@ struct CFG blocks::Vector{BasicBlock} index::Vector{Int} # map from instruction => basic-block number # TODO: make this O(1) instead of O(log(n_blocks))? - domtree + domtree # TODO type end function CFG(blocks::Vector{BasicBlock}, index::Vector{Int}) @@ -54,11 +54,7 @@ function cfg_insert_edge!(cfg::CFG, from::Int, to::Int) end function cfg_delete_edge!(cfg::CFG, from::Int, to::Int) - preds = cfg.blocks[to].preds - succs = cfg.blocks[from].succs - # Assumes that blocks appear at most once in preds and succs - deleteat!(preds, findfirst(x->x === from, preds)::Int) - deleteat!(succs, findfirst(x->x === to, succs)::Int) + kill_edge!(cfg.blocks, from, to) domtree_delete_edge!(cfg.domtree, cfg.blocks, from, to) nothing end @@ -529,6 +525,7 @@ mutable struct IncrementalCompact ir::IRCode result::InstructionStream result_bbs::Vector{BasicBlock} + result_domtree # TODO type ssa_rename::Vector{Any} bb_rename_pred::Vector{Int} @@ -591,18 +588,22 @@ mutable struct IncrementalCompact let blocks = blocks result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1] end + result_domtree = copy(code.cfg.domtree) + rename_nodes!(result_domtree, bb_rename) else bb_rename = Vector{Int}() result_bbs = code.cfg.blocks + result_domtree = code.cfg.domtree end ssa_rename = Any[SSAValue(i) for i = 1:new_len] late_fixup = Vector{Int}() new_new_nodes = NewNodeStream() pending_nodes = NewNodeStream() pending_perm = Int[] - return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms) + return new(code, result, result_bbs, result_domtree, ssa_rename, + bb_rename, bb_rename, used_ssas, late_fixup, perm, 1, + new_new_nodes, pending_nodes, pending_perm, 1, 1, 1, false, + allow_cfg_transforms, allow_cfg_transforms) end # For inlining @@ -616,11 +617,11 @@ mutable struct IncrementalCompact new_new_nodes = NewNodeStream() pending_nodes = NewNodeStream() pending_perm = Int[] - return new(code, parent.result, - parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas, - late_fixup, perm, 1, - new_new_nodes, pending_nodes, pending_perm, - 1, result_offset, parent.active_result_bb, false, false, false) + return new(code, parent.result, parent.result_bbs, + parent.result_domtree, ssa_rename, bb_rename, bb_rename, + parent.used_ssas, late_fixup, perm, 1, new_new_nodes, + pending_nodes, pending_perm, 1, result_offset, + parent.active_result_bb, false, false, false) end end @@ -892,63 +893,74 @@ function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int) preds, succs = bbs[to].preds, bbs[from].succs deleteat!(preds, findfirst(x->x === from, preds)::Int) deleteat!(succs, findfirst(x->x === to, succs)::Int) - if length(preds) == 0 - for succ in copy(bbs[to].succs) - kill_edge!(bbs, to, succ) - end - end + nothing end # N.B.: from and to are non-renamed indices function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int) - # Note: We recursively kill as many edges as are obviously dead. However, this - # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or - # worstcase during codegen). - preds = compact.result_bbs[compact.bb_rename_succ[to]].preds - succs = compact.result_bbs[compact.bb_rename_pred[from]].succs - deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int) - deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int) - # Check if the block is now dead - if length(preds) == 0 - for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs) - kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred)) + renamed_from = compact.bb_rename_pred[from] + renamed_to = compact.bb_rename_succ[to] + + preds = compact.result_bbs[renamed_to].preds + succs = compact.result_bbs[renamed_from].succs + + deleteat!(preds, findfirst(x->x === renamed_from, preds)::Int) + deleteat!(succs, findfirst(x->x === renamed_to, succs)::Int) + + domtree_delete_edge!(compact.result_domtree, + compact.result_bbs, + renamed_from, + renamed_to) + + # Recursively kill edges to dead blocks. This is not necessary for removing + # dead blocks with a subsequent `IncrementalCompact`, but killing all the + # statements in dead blocks allows passes not to have to worry about + # whether statements have become invalid by becoming dead. + if bb_unreachable(compact.result_domtree, renamed_to) + for succ in copy(compact.result_bbs[renamed_to].succs) + # Make sure edge hasn't already been killed in a previous iteration + # of this loop + if succ in compact.result_bbs[renamed_to].succs + kill_edge!(compact, active_bb, to, + findfirst(x->x === succ, compact.bb_rename_pred)) + end end if to < active_bb # Kill all statements in the block - stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts + stmts = compact.result_bbs[renamed_to].stmts for stmt in stmts compact.result[stmt][:inst] = nothing end compact.result[last(stmts)][:inst] = ReturnNode() end - else - # Remove this edge from all phi nodes in `to` block - # NOTE: It is possible for `to` to contain only `nothing` statements, - # so we must be careful to stop at its last statement - if to < active_bb - stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts - idx = first(stmts) - while idx <= last(stmts) - stmt = compact.result[idx][:inst] - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x === compact.bb_rename_pred[from], stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end - idx += 1 + + # TODO: kill statements in blocks past `active_bb` + end + + # Remove this edge from any phi nodes + if to < active_bb + stmts = compact.result_bbs[renamed_to].stmts + idx = first(stmts) + while idx <= last(stmts) + stmt = compact.result[idx][:inst] + stmt === nothing && continue + isa(stmt, PhiNode) || break + i = findfirst(x-> x === renamed_from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) end - else - stmts = compact.ir.cfg.blocks[to].stmts - for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) - stmt === nothing && continue - isa(stmt, PhiNode) || break - i = findfirst(x-> x === from, stmt.edges) - if i !== nothing - deleteat!(stmt.edges, i) - deleteat!(stmt.values, i) - end + idx += 1 + end + else + stmts = compact.ir.cfg.blocks[to].stmts + for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) + stmt === nothing && continue + isa(stmt, PhiNode) || break + i = findfirst(x-> x === from, stmt.edges) + if i !== nothing + deleteat!(stmt.edges, i) + deleteat!(stmt.values, i) end end end @@ -1401,7 +1413,9 @@ end function complete(compact::IncrementalCompact) result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1) - cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)]) + cfg = CFG(result_bbs, + Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)], + compact.result_domtree) return IRCode(compact.ir, compact.result, cfg, compact.new_new_nodes) end diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl index b0fad48c57ec7..9276238e640e5 100644 --- a/base/compiler/ssair/slot2ssa.jl +++ b/base/compiler/ssair/slot2ssa.jl @@ -385,7 +385,6 @@ function domsort_ssa!(ir::IRCode) push!(result_order, node) cs = ir.cfg.domtree.nodes[node].children terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst] - iscondbr = isa(terminator, GotoIfNot) let old_node = node + 1 if length(cs) >= 1 # Adding the nodes in reverse sorted order attempts to retain @@ -499,7 +498,17 @@ function domsort_ssa!(ir::IRCode) for i in 1:length(result) result[i][:inst] = renumber_ssa!(result[i][:inst], inst_rename, true) end - cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]]) + # Recompute domtree from scratch because adding the new nodes for retaining + # fallthrough can be a significant change to the CFG, and recomputing the + # whole thing is probably faster than making many updates. If not for these + # new nodes, we could use + # + # domtree = rename_blocks!( + # ir.cfg.domtree, + # Int[haskey(bb_rename, old_bb) ? bb_rename[old_bb] : -1 + # for old_bb in 1:length(ir.cfg.blocks)]) + domtree = construct_domtree(new_bbs) + cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]], domtree) new_new_nodes = NewNodeStream(length(ir.new_nodes)) for i = 1:length(ir.new_nodes) new_info = ir.new_nodes.info[i] From e2e00b0f3258e1e0ad4ffaa98d8caf99a37a6d31 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Mon, 25 Nov 2019 13:25:08 -0500 Subject: [PATCH 12/16] Update domtree dynamically in `cfg_simplify` --- base/compiler/ssair/domtree.jl | 91 ++++++++++++++++++++++++++++++++-- base/compiler/ssair/ir.jl | 6 +-- base/compiler/ssair/passes.jl | 19 ++++--- test/compiler/ssair.jl | 14 +++--- 4 files changed, 112 insertions(+), 18 deletions(-) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 610d852e02f95..86e72fba7982a 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -489,9 +489,9 @@ end Rename basic block numbers in a dominator tree, removing the block if it is renamed to -1. """ -function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber}) +function rename_blocks!(domtree::DomTree, rename_bb::Vector{BBNumber}) # Rename DFS tree - rename_nodes!(domtree.dfs_tree, rename_bb) + rename_blocks!(domtree.dfs_tree, rename_bb) # `snca_state` is indexed by preorder number, so should be unchanged @@ -519,7 +519,7 @@ end Rename basic block numbers in a DFS tree, removing the block if it is renamed to -1. """ -function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber}) +function rename_blocks!(D::DFSTree, rename_bb::Vector{BBNumber}) n_blocks = length(D.to_pre) n_reachable_blocks = length(D.from_pre) @@ -547,6 +547,91 @@ function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber}) return D end +""" +Combine two blocks `from` and `to`. It is assumed that there is an edge from +`from` to `to`, and that the sole successor of `from` is `to` and the sole +predecessor of `to` is `from`. This does not remove the `to` node, which is +then considered to have no edges. +""" +function combine_blocks!(domtree::DomTree, from::BBNumber, to::BBNumber) + orig_pre_of_to = domtree.dfs_tree.to_pre[to] + + # Combine nodes in DFS tree + combine_blocks!(domtree.dfs_tree, from, to) + + # Update `snca_state` + deleteat!(domtree.snca_state, orig_pre_of_to) + for pre in 1:length(domtree.snca_state) + # If semidominator was `to`, set it to `from` instead + if domtree.snca_state[pre].semi == orig_pre_of_to + label = domtree.snca_state[pre].label + domtree.snca_state[pre] = SNCAData(orig_pre_of_to - 1, label) + end + # Labels will be reset anyway, so no need to update them + end + + # Update `idoms_bb` + for bb in 1:length(domtree.idoms_bb) + # If immediate dominator was `to`, set it to `from` instead + if domtree.idoms_bb[bb] == to + domtree.idoms_bb[bb] = from + end + end + + # Update `nodes` + copy!(domtree.nodes[from].children, domtree.nodes[to].children) + domtree.nodes[to] = DomTreeNode() + # Recursively decrement level of the affected children + worklist = domtree.nodes[from].children + while !isempty(worklist) + node = pop!(worklist) + domtree.nodes[node] = DomTreeNode(domtree.nodes[node].level-1, + domtree.nodes[node].children) + foreach(child -> push!(worklist, child), + domtree.nodes[node].children) + end + + return domtree +end + +function combine_blocks!(D::DFSTree, from::BBNumber, to::BBNumber) + # Reassignment of preorder and postorder numbers depend on these + # conditions, which should hold if `from` has only one successor `to` and + # `to` has only one predecessor `from`. + @assert D.to_pre[from] + 1 == D.to_pre[to] + @assert D.to_post[from] == D.to_post[to] + 1 + + # Set preorder number of `to` to 0, + # decrement preorder numbers greater than original preorder number of `to`. + # Set postorder number of `to` to 0, + # decrement postorder numbers greater than original preorder number of `to`. + + orig_pre_of_to = D.to_pre[to] + orig_post_of_to = D.to_post[to] + + D.to_pre[to] = 0 + D.to_post[to] = 0 + for bb in 1:length(D.to_pre) + if D.to_pre[bb] > orig_pre_of_to + D.to_pre[bb] -= 1 + end + if D.to_post[bb] > orig_post_of_to + D.to_post[bb] -= 1 + end + end + deleteat!(D.from_pre, orig_pre_of_to) + deleteat!(D.from_post, orig_post_of_to) + + deleteat!(D.to_parent_pre, orig_pre_of_to) + for pre in 1:length(D.to_parent_pre) + if D.to_parent_pre[pre] > orig_pre_of_to + D.to_parent_pre[pre] -= 1 + end + end + + return D +end + """ Checks if bb1 dominates bb2. bb1 and bb2 are indexes into the CFG blocks. diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index f13958f22afdb..a01fddddd02f9 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -586,10 +586,10 @@ mutable struct IncrementalCompact end end let blocks = blocks - result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1] + result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) + if bb_rename[i] != -1] end - result_domtree = copy(code.cfg.domtree) - rename_nodes!(result_domtree, bb_rename) + result_domtree = rename_blocks!(copy(code.cfg.domtree), bb_rename) else bb_rename = Vector{Int}() result_bbs = code.cfg.blocks diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index da675f60b0b3e..a780e5f8f9271 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -1032,6 +1032,9 @@ end function cfg_simplify!(ir::IRCode) bbs = ir.cfg.blocks + old_domtree = ir.cfg.domtree + new_domtree = copy(old_domtree) + merge_into = zeros(Int, length(bbs)) merged_succ = zeros(Int, length(bbs)) function follow_merge_into(idx::Int) @@ -1055,6 +1058,13 @@ function cfg_simplify!(ir::IRCode) # Prevent cycles by making sure we don't end up back at `idx` # by following what is to be merged into `succ` if follow_merged_succ(succ) != idx + # Update domtree, by combining the block that `idx` is + # going to be merged into with the block that `succ` was + # going to be merged into (before this merge) + combine_blocks!(new_domtree, + follow_merge_into(idx), + follow_merge_into(succ)) + merge_into[succ] = idx merged_succ[idx] = succ end @@ -1066,12 +1076,8 @@ function cfg_simplify!(ir::IRCode) max_bb_num = 1 bb_rename_succ = zeros(Int, length(bbs)) for i = 1:length(bbs) - # Drop blocks that will be merged away - if merge_into[i] != 0 - bb_rename_succ[i] = -1 - end - # Drop blocks with no predecessors - if i != 1 && length(ir.cfg.blocks[i].preds) == 0 + # Drop blocks that will be merged away or are unreachable + if merge_into[i] != 0 || bb_unreachable(old_domtree, i) bb_rename_succ[i] = -1 end @@ -1152,6 +1158,7 @@ function cfg_simplify!(ir::IRCode) compact.bb_rename_succ = bb_rename_succ compact.bb_rename_pred = bb_rename_pred compact.result_bbs = cresult_bbs + compact.result_domtree = rename_blocks!(new_domtree, bb_rename_succ) result_idx = 1 for (idx, orig_bb) in enumerate(result_bbs) ms = orig_bb diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 7f15f49045e48..cf381cdefae47 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -222,12 +222,14 @@ end # nodes let ci = make_ci([ # Block 1 - Core.Compiler.GotoNode(3), - # Block 2 (no predecessors) - Core.Compiler.ReturnNode(3), - # Block 3 - Core.PhiNode(Any[1, 2], Any[100, 200]), - Core.Compiler.ReturnNode(Core.SSAValue(3)) + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 4), + # Block 2 + Core.Compiler.GotoNode(4), + # Block 3 (no predecessors) + Core.Compiler.GotoNode(4), + # Block 4 + Core.PhiNode(Any[1, 2, 3], Any[100, 200, 300]), + Core.Compiler.ReturnNode(Core.SSAValue(4)) ]) ir = Core.Compiler.inflate_ir(ci) ir = Core.Compiler.compact!(ir, true) From 17a69997bccf7d91bd2cafe4d7edb17caf72b041 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Sun, 8 Dec 2019 01:23:42 -0500 Subject: [PATCH 13/16] Kill statements in blocks that are unreachable, add tests for killing edges This change only affects statements that we have yet to encounter after killing an edge, while iterating through `IncrementalCompact`. Statements in dead blocks that come before the point at which the edge is killed are killed in `kill_edge!`, when the edge is killed. --- base/compiler/ssair/ir.jl | 30 ++++++++++++++++----- test/compiler/ssair.jl | 56 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 6 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index a01fddddd02f9..65748a591cbb8 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -3,6 +3,8 @@ @inline isexpr(@nospecialize(stmt), head::Symbol) = isa(stmt, Expr) && stmt.head === head Core.PhiNode() = Core.PhiNode(Any[], Any[]) +isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode) + """ Like UnitRange{Int}, but can handle the `last` field, being temporarily < first (this can happen during compacting) @@ -925,16 +927,21 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: findfirst(x->x === succ, compact.bb_rename_pred)) end end - if to < active_bb - # Kill all statements in the block + + # Kill all statements in dead successor if it has already been + # processed. Also kill statements if dead successor is the active BB, + # in order to kill statements that have already been processed, + # although statements in the current BB that have not been processed + # will be killed in `process_node!`. Statements in dead blocks that + # have not yet been processed (`to` > `active_bb`) will be killed in + # `process_node!`. + if to <= active_bb stmts = compact.result_bbs[renamed_to].stmts for stmt in stmts compact.result[stmt][:inst] = nothing end compact.result[last(stmts)][:inst] = ReturnNode() end - - # TODO: kill statements in blocks past `active_bb` end # Remove this edge from any phi nodes @@ -974,7 +981,19 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr late_fixup = compact.late_fixup used_ssas = compact.used_ssas ssa_rename[idx] = SSAValue(result_idx) - if stmt === nothing + if compact.cfg_transforms_enabled && + # Don't kill statements in blocks renamed to -1 because + # `cfg_simplify!` renames blocks that have been merged into others + # to -1, but the statements in them are not dead. Unreachable + # blocks detected in the constructor of `IncrementalCompact` are + # removed then, so we never encounter statements from them here. + compact.bb_rename_succ[active_bb] != -1 && + bb_unreachable(compact.result_domtree, + compact.bb_rename_succ[active_bb]) + # Kill statements in dead blocks + result[result_idx][:inst] = isterminator(stmt) ? ReturnNode() : nothing + result_idx += 1 + elseif stmt === nothing ssa_rename[idx] = stmt elseif isa(stmt, OldSSAValue) ssa_rename[idx] = ssa_rename[stmt.id] @@ -1272,7 +1291,6 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}= # result_idx is not, incremented, but that's ok and expected compact.result[old_result_idx] = compact.ir.stmts[idx] result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx, active_bb, true) - stmt_if_any = old_result_idx == result_idx ? nothing : compact.result[old_result_idx][:inst] compact.result_idx = result_idx if idx == last(bb.stmts) && !attach_after_stmt_after(compact, idx) finish_current_bb!(compact, active_bb, old_result_idx) diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index cf381cdefae47..e04f4c89ca727 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -300,3 +300,59 @@ let cfg = CFG(BasicBlock[ Compiler.cfg_insert_edge!(cfg, 1, 3) @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4] end + +# Make sure killing edges while iterating through an `IncrementalCompact` kills +# all the statements in the dead blocks too. +# +# Block that becomes unreachable is before the active BB +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoNode(4), + # Block 2 + Core.Compiler.PhiNode(Any[2, 4], Any[100, 200]), + Core.Compiler.GotoNode(2), + # Block 3 + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 2), + # Block 4 + Core.Compiler.ReturnNode(0) + ]) + ir = Core.Compiler.inflate_ir(ci) + compact = Core.Compiler.IncrementalCompact(ir, true) + Core.Compiler.foreach(x -> begin + # Delete edge to block 2 when at statement 4 + if x.first == 4 + Core.Compiler.kill_edge!( + compact, compact.active_result_bb, 3, 2) + compact.result[4] = Core.Compiler.GotoNode(4) + end + end, compact) + ir = Core.Compiler.finish(compact) + @test Core.Compiler.verify_ir(ir) === nothing + @test ir.stmts[2][:inst] === nothing +end +# Block that becomes unreachable is after the active BB +let ci = make_ci([ + # Block 1 + Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 3), + # Block 2 + Core.Compiler.GotoNode(5), + # Block 3 + Core.Compiler.PhiNode(Any[2, 3], Any[100, 200]), + Core.Compiler.GotoNode(3), + # Block 4 + Core.Compiler.ReturnNode(0) + ]) + ir = Core.Compiler.inflate_ir(ci) + compact = Core.Compiler.IncrementalCompact(ir, true) + Core.Compiler.foreach(x -> begin + # Delete edge to block 3 when at statement 1 + if x.first == 1 + Core.Compiler.kill_edge!( + compact, compact.active_result_bb, 1, 3) + compact.result[1] = Core.Compiler.GotoNode(2) + end + end, compact) + ir = Core.Compiler.finish(compact) + @test Core.Compiler.verify_ir(ir) === nothing + @test ir.stmts[3][:inst] === nothing +end From 0b3c63c5903f21e438e219d34285c3b7b793b836 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Mon, 25 Nov 2019 15:12:23 -0500 Subject: [PATCH 14/16] Fix infinite loop bug in `kill_edge!` If a statement was `nothing`, `kill_edge!` would never move on from trying to kill it because the index wasn't incremented. --- base/compiler/ssair/ir.jl | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 65748a591cbb8..e2a6d7611bcc1 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -530,6 +530,9 @@ mutable struct IncrementalCompact result_domtree # TODO type ssa_rename::Vector{Any} + # `bb_rename_pred` is for renaming predecessors to the blocks they have + # been merged into (differs from `bb_rename_succ` only if blocks are + # combined), `bb_rename_succ` is how blocks are actually renamed bb_rename_pred::Vector{Int} bb_rename_succ::Vector{Int} @@ -906,8 +909,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: preds = compact.result_bbs[renamed_to].preds succs = compact.result_bbs[renamed_from].succs - deleteat!(preds, findfirst(x->x === renamed_from, preds)::Int) - deleteat!(succs, findfirst(x->x === renamed_to, succs)::Int) + deleteat!(preds, findfirst(x -> x === renamed_from, preds)::Int) + deleteat!(succs, findfirst(x -> x === renamed_to, succs)::Int) domtree_delete_edge!(compact.result_domtree, compact.result_bbs, @@ -924,7 +927,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: # of this loop if succ in compact.result_bbs[renamed_to].succs kill_edge!(compact, active_bb, to, - findfirst(x->x === succ, compact.bb_rename_pred)) + findfirst(x -> x === succ, compact.bb_rename_pred)) end end @@ -950,9 +953,12 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: idx = first(stmts) while idx <= last(stmts) stmt = compact.result[idx][:inst] - stmt === nothing && continue + if stmt === nothing + idx += 1 + continue + end isa(stmt, PhiNode) || break - i = findfirst(x-> x === renamed_from, stmt.edges) + i = findfirst(x -> x === renamed_from, stmt.edges) if i !== nothing deleteat!(stmt.edges, i) deleteat!(stmt.values, i) @@ -964,7 +970,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to:: for stmt in CompactPeekIterator(compact, first(stmts), last(stmts)) stmt === nothing && continue isa(stmt, PhiNode) || break - i = findfirst(x-> x === from, stmt.edges) + i = findfirst(x -> x === from, stmt.edges) if i !== nothing deleteat!(stmt.edges, i) deleteat!(stmt.values, i) From ff20764ba164aceec6e248ea4d50188ab9895445 Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Sat, 14 Dec 2019 22:36:57 -0500 Subject: [PATCH 15/16] Move `BasicBlock` to separate source file This is so we can add type declarations to fields in ir.jl that are domtrees, by breaking the dependency loop between domtree.jl (uses basic blocks but defines domtrees) and ir.jl (uses domtrees but defined basic blocks). --- base/compiler/ssair/basicblock.jl | 32 +++++++++++++++++++++++++++++++ base/compiler/ssair/driver.jl | 3 ++- base/compiler/ssair/ir.jl | 31 ++---------------------------- 3 files changed, 36 insertions(+), 30 deletions(-) create mode 100644 base/compiler/ssair/basicblock.jl diff --git a/base/compiler/ssair/basicblock.jl b/base/compiler/ssair/basicblock.jl new file mode 100644 index 0000000000000..427aae707e664 --- /dev/null +++ b/base/compiler/ssair/basicblock.jl @@ -0,0 +1,32 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +""" +Like UnitRange{Int}, but can handle the `last` field, being temporarily +< first (this can happen during compacting) +""" +struct StmtRange <: AbstractUnitRange{Int} + start::Int + stop::Int +end + +first(r::StmtRange) = r.start +last(r::StmtRange) = r.stop +iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1) + +StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range)) + +struct BasicBlock + stmts::StmtRange + preds::Vector{Int} + succs::Vector{Int} +end + +function BasicBlock(stmts::StmtRange) + return BasicBlock(stmts, Int[], Int[]) +end + +function BasicBlock(old_bb, stmts) + return BasicBlock(stmts, old_bb.preds, old_bb.succs) +end + +copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs)) diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl index 5a1bfcb28a530..1f6470fed9046 100644 --- a/base/compiler/ssair/driver.jl +++ b/base/compiler/ssair/driver.jl @@ -10,8 +10,9 @@ else end end -include("compiler/ssair/ir.jl") +include("compiler/ssair/basicblock.jl") include("compiler/ssair/domtree.jl") +include("compiler/ssair/ir.jl") include("compiler/ssair/slot2ssa.jl") include("compiler/ssair/queries.jl") include("compiler/ssair/passes.jl") diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index e2a6d7611bcc1..d34a854f8ed0c 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -5,38 +5,11 @@ Core.PhiNode() = Core.PhiNode(Any[], Any[]) isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode) -""" -Like UnitRange{Int}, but can handle the `last` field, being temporarily -< first (this can happen during compacting) -""" -struct StmtRange <: AbstractUnitRange{Int} - start::Int - stop::Int -end -first(r::StmtRange) = r.start -last(r::StmtRange) = r.stop -iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1) - -StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range)) - -struct BasicBlock - stmts::StmtRange - preds::Vector{Int} - succs::Vector{Int} -end -function BasicBlock(stmts::StmtRange) - return BasicBlock(stmts, Int[], Int[]) -end -function BasicBlock(old_bb, stmts) - return BasicBlock(stmts, old_bb.preds, old_bb.succs) -end -copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs)) - struct CFG blocks::Vector{BasicBlock} index::Vector{Int} # map from instruction => basic-block number # TODO: make this O(1) instead of O(log(n_blocks))? - domtree # TODO type + domtree::DomTree end function CFG(blocks::Vector{BasicBlock}, index::Vector{Int}) @@ -527,7 +500,7 @@ mutable struct IncrementalCompact ir::IRCode result::InstructionStream result_bbs::Vector{BasicBlock} - result_domtree # TODO type + result_domtree::DomTree ssa_rename::Vector{Any} # `bb_rename_pred` is for renaming predecessors to the blocks they have From 6a250c4249de25784bda9223192f4ec8bc897a7b Mon Sep 17 00:00:00 2001 From: Leon Shen Date: Thu, 31 Oct 2019 12:21:14 -0400 Subject: [PATCH 16/16] Enable CFG transforms; turn DCE back on globally --- base/compiler/ssair/ir.jl | 4 ++-- test/compiler/inline.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index d34a854f8ed0c..4688a14a6fb8c 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -527,7 +527,7 @@ mutable struct IncrementalCompact cfg_transforms_enabled::Bool fold_constant_branches::Bool - function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false) + function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=true) # Sort by position with attach after nodes after regular ones perm = my_sortperm(Int[let new_node = code.new_nodes.info[i] (new_node.pos * 2 + Int(new_node.attach_after)) @@ -1416,7 +1416,7 @@ function complete(compact::IncrementalCompact) return IRCode(compact.ir, compact.result, cfg, compact.new_new_nodes) end -function compact!(code::IRCode, allow_cfg_transforms::Bool=false) +function compact!(code::IRCode, allow_cfg_transforms::Bool=true) compact = IncrementalCompact(code, allow_cfg_transforms) # Just run through the iterator without any processing for _ in compact; end # _ isa Pair{Int, Any} diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 85ac8fd6a0f4c..334bafc5c8685 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -161,7 +161,7 @@ function f_ifelse(x) return b ? x + 1 : x end # 2 for now because the compiler leaves a GotoNode around -@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 +@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2 # Test that inlining of _apply properly hits the inference cache @noinline cprop_inline_foo1() = (1, 1)