From bb6e299c6e2db9e8a0aaad024826c93e171c868f Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Mon, 14 Oct 2019 20:49:52 -0400
Subject: [PATCH 01/16] Decline to remove phi nodes when this might cause
 forward references

Phi nodes are optimized away when there is only one predecessor, but this can
cause problems in dead loops because forward references can be created, leading
to issues with optimization passes that look at all code, dead or not. This
fixes issue #29107 when DCE is turned on.
---
 base/compiler/ssair/ir.jl | 17 +++++++++++++++--
 test/compiler/ssair.jl    | 40 +++++++++++++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 621072527a334..95cdcc15b788b 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -500,9 +500,11 @@ mutable struct IncrementalCompact
     ir::IRCode
     result::InstructionStream
     result_bbs::Vector{BasicBlock}
+
     ssa_rename::Vector{Any}
     bb_rename_pred::Vector{Int}
     bb_rename_succ::Vector{Int}
+
     used_ssas::Vector{Int}
     late_fixup::Vector{Int}
     perm::Vector{Int}
@@ -512,6 +514,7 @@ mutable struct IncrementalCompact
     # TODO: Switch these two to a min-heap of some sort
     pending_nodes::NewNodeStream  # New nodes that were after the compaction point at insertion time
     pending_perm::Vector{Int}
+
     # State
     idx::Int
     result_idx::Int
@@ -519,6 +522,7 @@ mutable struct IncrementalCompact
     renamed_new_nodes::Bool
     cfg_transforms_enabled::Bool
     fold_constant_branches::Bool
+
     function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false)
         # Sort by position with attach after nodes after regular ones
         perm = my_sortperm(Int[let new_node = code.new_nodes.info[i]
@@ -871,7 +875,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
     # Note: We recursively kill as many edges as are obviously dead. However, this
     # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or
     # worstcase during codegen).
-    preds, succs = compact.result_bbs[compact.bb_rename_succ[to]].preds, compact.result_bbs[compact.bb_rename_pred[from]].succs
+    preds = compact.result_bbs[compact.bb_rename_succ[to]].preds
+    succs = compact.result_bbs[compact.bb_rename_pred[from]].succs
     deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int)
     deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int)
     # Check if the block is now dead
@@ -985,7 +990,15 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         result_idx += 1
     elseif isa(stmt, PhiNode)
         values = process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa)
-        if length(stmt.edges) == 1 && isassigned(values, 1) &&
+        # Don't remove the phi node if it is before the definition of its value
+        # because doing so can create forward references. This should only
+        # happen with dead loops, but can cause problems when optimization
+        # passes look at all code, dead or not. This check should be
+        # unnecessary when DCE can remove those dead loops entirely, so this is
+        # just to be safe.
+        before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) &&
+            idx < values[1].id
+        if length(stmt.edges) == 1 && isassigned(values, 1) && !before_def &&
                 length(compact.cfg_transforms_enabled ?
                     compact.result_bbs[compact.bb_rename_succ[active_bb]].preds :
                     compact.ir.cfg.blocks[active_bb].preds) == 1
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 3f7b9b02a728f..657d7a4224aae 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -1,5 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Base.Meta
 using Core.IR
 const Compiler = Core.Compiler
 
@@ -135,7 +136,6 @@ end
 @test f32579(0, false) === false
 
 # Test for bug caused by renaming blocks improperly, related to PR #32145
-using Base.Meta
 let ci = (Meta.@lower 1 + 1).args[1]
     ci.code = [
         # block 1
@@ -166,7 +166,6 @@ let ci = (Meta.@lower 1 + 1).args[1]
 end
 
 # Test that GlobalRef in value position is non-canonical
-using Base.Meta
 let ci = (Meta.@lower 1 + 1).args[1]
     ci.code = [
         Expr(:call, GlobalRef(Main, :something_not_defined_please))
@@ -180,3 +179,40 @@ let ci = (Meta.@lower 1 + 1).args[1]
     ir = Core.Compiler.compact!(ir, true)
     @test_throws ErrorException Core.Compiler.verify_ir(ir, false)
 end
+
+# Issue #29107
+let ci = (Meta.@lower 1 + 1).args[1]
+    ci.code = [
+        # Block 1
+        Core.Compiler.GotoNode(6),
+        # Block 2
+        # The following phi node gets deleted because it only has one edge, so
+        # the call to `something` is made to use the value of `something2()`,
+        # even though this value is defined after it. We don't want this to
+        # happen even though this block is dead because subsequent optimization
+        # passes may look at all code, dead or not.
+        Core.PhiNode(Any[2], Any[Core.SSAValue(4)]),
+        Expr(:call, :something, Core.SSAValue(2)),
+        Expr(:call, :something2),
+        Core.Compiler.GotoNode(2),
+        # Block 3
+        Core.Compiler.ReturnNode(1000)
+    ]
+    nstmts = length(ci.code)
+    ci.ssavaluetypes = nstmts
+    ci.codelocs = fill(Int32(1), nstmts)
+    ci.ssaflags = fill(Int32(0), nstmts)
+    ir = Core.Compiler.inflate_ir(ci)
+    ir = Core.Compiler.compact!(ir, true)
+    # Make sure that if there is a call to `something` (block 2 should be
+    # removed entirely with working DCE), it doesn't use any SSA values that
+    # come after it.
+    for i in 1:length(ir.stmts)
+        s = ir.stmts[i]
+        if isa(s, Expr) && s.head == :call && s.args[1] == :something
+            if isa(s.args[2], SSAValue)
+                @test s.args[2].id <= i
+            end
+        end
+    end
+end

From 714e28c1fceaf53ed64270e04532f566ae8d4700 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Tue, 29 Oct 2019 16:08:08 -0400
Subject: [PATCH 02/16] Remove edges from dead blocks from phi nodes

---
 base/compiler/ssair/driver.jl |  3 +--
 base/compiler/ssair/ir.jl     | 39 +++++++++++++++++++++++++---
 base/compiler/ssair/passes.jl |  9 ++++++-
 test/compiler/irpasses.jl     |  3 +--
 test/compiler/ssair.jl        | 49 +++++++++++++++++++++++------------
 5 files changed, 79 insertions(+), 24 deletions(-)

diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index 0ff6943aa5408..cb8e9ce01d902 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -128,8 +128,7 @@ function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState)
     #@timeit "verify 2" verify_ir(ir)
     ir = compact!(ir)
     #@Base.show ("before_sroa", ir)
-    @timeit "domtree 2" domtree = construct_domtree(ir.cfg)
-    @timeit "SROA" ir = getfield_elim_pass!(ir, domtree)
+    @timeit "SROA" ir = getfield_elim_pass!(ir)
     #@Base.show ir.new_nodes
     #@Base.show ("after_sroa", ir)
     ir = adce_pass!(ir)
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 95cdcc15b788b..9fb4ff6745188 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -989,7 +989,41 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa)
         result_idx += 1
     elseif isa(stmt, PhiNode)
-        values = process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa)
+        if compact.cfg_transforms_enabled
+            # Rename phi node edges
+            map!(i -> compact.bb_rename_pred[i], stmt.edges, stmt.edges)
+
+            # Remove edges and values associated with dead blocks. Entries in
+            # `values` can be undefined when the phi node refers to something
+            # that is not defined. (This is not a sign that the compiler is
+            # unintentionally leaving some entries in `values` uninitialized.)
+            # For example, consider a reference to a variable that is only
+            # defined if some branch is taken.
+            #
+            # In order to leave undefined values undefined (undefined-ness is
+            # not a value we can copy), we copy only the edges and (defined)
+            # values we want to keep to new arrays initialized with undefined
+            # elements.
+            edges = Vector{Any}(undef, length(stmt.edges))
+            values = Vector{Any}(undef, length(stmt.values))
+            new_index = 1
+            for old_index in 1:length(stmt.edges)
+                if stmt.edges[old_index] != -1
+                    edges[new_index] = stmt.edges[old_index]
+                    if isassigned(stmt.values, old_index)
+                        values[new_index] = stmt.values[old_index]
+                    end
+                    new_index += 1
+                end
+            end
+            resize!(edges, new_index-1)
+            resize!(values, new_index-1)
+        else
+            edges = stmt.edges
+            values = stmt.values
+        end
+
+        values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa)
         # Don't remove the phi node if it is before the definition of its value
         # because doing so can create forward references. This should only
         # happen with dead loops, but can cause problems when optimization
@@ -998,14 +1032,13 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         # just to be safe.
         before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) &&
             idx < values[1].id
-        if length(stmt.edges) == 1 && isassigned(values, 1) && !before_def &&
+        if length(edges) == 1 && isassigned(values, 1) && !before_def &&
                 length(compact.cfg_transforms_enabled ?
                     compact.result_bbs[compact.bb_rename_succ[active_bb]].preds :
                     compact.ir.cfg.blocks[active_bb].preds) == 1
             # There's only one predecessor left - just replace it
             ssa_rename[idx] = values[1]
         else
-            edges = compact.cfg_transforms_enabled ? map!(i->compact.bb_rename_pred[i], stmt.edges, stmt.edges) : stmt.edges
             result[result_idx][:inst] = PhiNode(edges, values)
             result_idx += 1
         end
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index a2dc6d6e75f60..8e4cda9220b1c 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -524,7 +524,7 @@ function perform_lifting!(compact::IncrementalCompact,
 end
 
 assertion_counter = 0
-function getfield_elim_pass!(ir::IRCode, domtree::DomTree)
+function getfield_elim_pass!(ir::IRCode)
     compact = IncrementalCompact(ir)
     insertions = Vector{Any}()
     defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}()
@@ -721,6 +721,13 @@ function getfield_elim_pass!(ir::IRCode, domtree::DomTree)
     used_ssas = copy(compact.used_ssas)
     simple_dce!(compact)
     ir = complete(compact)
+
+    # Compute domtree, needed below, now that we have finished compacting the
+    # IR. This needs to be after we iterate through the IR with
+    # `IncrementalCompact` because removing dead blocks can invalidate the
+    # domtree.
+    @timeit "domtree 2" domtree = construct_domtree(ir.cfg)
+
     # Now go through any mutable structs and see which ones we can eliminate
     for (idx, (intermediaries, defuse)) in defuses
         intermediaries = collect(intermediaries)
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index be7539cac4d9f..e4b9d970e9293 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -180,8 +180,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any])
     @test Core.Compiler.verify_ir(ir) === nothing
-    domtree = Core.Compiler.construct_domtree(ir.cfg)
-    ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir, domtree)
+    ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir)
     @test Core.Compiler.verify_ir(ir) === nothing
 end
 
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 657d7a4224aae..e61d4b0f8d5d1 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -3,6 +3,19 @@
 using Base.Meta
 using Core.IR
 const Compiler = Core.Compiler
+using .Compiler: CFG, BasicBlock
+
+make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
+
+function make_ci(code)
+    ci = (Meta.@lower 1 + 1).args[1]
+    ci.code = code
+    nstmts = length(ci.code)
+    ci.ssavaluetypes = nstmts
+    ci.codelocs = fill(Int32(1), nstmts)
+    ci.ssaflags = fill(Int32(0), nstmts)
+    return ci
+end
 
 # TODO: this test is broken
 #let code = Any[
@@ -29,7 +42,6 @@ const Compiler = Core.Compiler
 #end
 
 # Issue #31121
-using .Compiler: CFG, BasicBlock
 
 # We have the following CFG and corresponding DFS numbering:
 #
@@ -48,7 +60,6 @@ using .Compiler: CFG, BasicBlock
 # than `3`, so the idom search missed that `1` is `3`'s semi-dominator). Here
 # we manually construct that CFG and verify that the DFS records the correct
 # parent.
-make_bb(preds, succs) = BasicBlock(Compiler.StmtRange(0, 0), preds, succs)
 let cfg = CFG(BasicBlock[
     make_bb([]     , [2, 3]),
     make_bb([1]    , [4, 5]),
@@ -136,8 +147,7 @@ end
 @test f32579(0, false) === false
 
 # Test for bug caused by renaming blocks improperly, related to PR #32145
-let ci = (Meta.@lower 1 + 1).args[1]
-    ci.code = [
+let ci = make_ci([
         # block 1
         Core.Compiler.GotoIfNot(Expr(:boundscheck), 6),
         # block 2
@@ -155,11 +165,7 @@ let ci = (Meta.@lower 1 + 1).args[1]
         Core.Compiler.ReturnNode(Core.SSAValue(8)),
         # block 6
         Core.Compiler.ReturnNode(Core.SSAValue(8))
-    ]
-    nstmts = length(ci.code)
-    ci.ssavaluetypes = nstmts
-    ci.codelocs = fill(Int32(1), nstmts)
-    ci.ssaflags = fill(Int32(0), nstmts)
+    ])
     ir = Core.Compiler.inflate_ir(ci)
     ir = Core.Compiler.compact!(ir, true)
     @test Core.Compiler.verify_ir(ir) == nothing
@@ -181,8 +187,7 @@ let ci = (Meta.@lower 1 + 1).args[1]
 end
 
 # Issue #29107
-let ci = (Meta.@lower 1 + 1).args[1]
-    ci.code = [
+let ci = make_ci([
         # Block 1
         Core.Compiler.GotoNode(6),
         # Block 2
@@ -197,11 +202,7 @@ let ci = (Meta.@lower 1 + 1).args[1]
         Core.Compiler.GotoNode(2),
         # Block 3
         Core.Compiler.ReturnNode(1000)
-    ]
-    nstmts = length(ci.code)
-    ci.ssavaluetypes = nstmts
-    ci.codelocs = fill(Int32(1), nstmts)
-    ci.ssaflags = fill(Int32(0), nstmts)
+    ])
     ir = Core.Compiler.inflate_ir(ci)
     ir = Core.Compiler.compact!(ir, true)
     # Make sure that if there is a call to `something` (block 2 should be
@@ -216,3 +217,19 @@ let ci = (Meta.@lower 1 + 1).args[1]
         end
     end
 end
+
+# Make sure dead blocks that are removed are not still referenced in live phi
+# nodes
+let ci = make_ci([
+        # Block 1
+        Core.Compiler.GotoNode(3),
+        # Block 2 (no predecessors)
+        Core.Compiler.ReturnNode(3),
+        # Block 3
+        Core.PhiNode(Any[1, 2], Any[100, 200]),
+        Core.Compiler.ReturnNode(Core.SSAValue(3))
+    ])
+    ir = Core.Compiler.inflate_ir(ci)
+    ir = Core.Compiler.compact!(ir, true)
+    @test Core.Compiler.verify_ir(ir) == nothing
+end

From 1c9d4988a2396910573a05b436846ee2398e3a34 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Mon, 25 Nov 2019 13:25:08 -0500
Subject: [PATCH 03/16] Fix possible merge cycle bug in `cfg_simplify!`

---
 base/compiler/ssair/passes.jl | 101 +++++++++++++++++++---------------
 test/compiler/irpasses.jl     |  24 ++++++++
 2 files changed, 81 insertions(+), 44 deletions(-)

diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 8e4cda9220b1c..c53e8660d7887 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1041,30 +1041,49 @@ function cfg_simplify!(ir::IRCode)
     bbs = ir.cfg.blocks
     merge_into = zeros(Int, length(bbs))
     merged_succ = zeros(Int, length(bbs))
+    function follow_merge_into(idx::Int)
+        while merge_into[idx] != 0
+            idx = merge_into[idx]
+        end
+        return idx
+    end
+    function follow_merged_succ(idx::Int)
+        while merged_succ[idx] != 0
+            idx = merged_succ[idx]
+        end
+        return idx
+    end
 
-    # Walk the CFG at from the entry block and aggressively combine blocks
+    # Walk the CFG from the entry block and aggressively combine blocks
     for (idx, bb) in enumerate(bbs)
         if length(bb.succs) == 1
             succ = bb.succs[1]
             if length(bbs[succ].preds) == 1
-                merge_into[succ] = idx
-                merged_succ[idx] = succ
+                # Prevent cycles by making sure we don't end up back at `idx`
+                # by following what is to be merged into `succ`
+                if follow_merged_succ(succ) != idx
+                    merge_into[succ] = idx
+                    merged_succ[idx] = succ
+                end
             end
         end
     end
+
+    # Assign new BB numbers
     max_bb_num = 1
     bb_rename_succ = zeros(Int, length(bbs))
-    # Lay out the basic blocks
     for i = 1:length(bbs)
+        # Drop blocks that will be merged away
         if merge_into[i] != 0
             bb_rename_succ[i] = -1
-            continue
         end
-        # Drop unreachable blocks
+        # Drop blocks with no predecessors
         if i != 1 && length(ir.cfg.blocks[i].preds) == 0
             bb_rename_succ[i] = -1
         end
+
         bb_rename_succ[i] != 0 && continue
+
         curr = i
         while true
             bb_rename_succ[curr] = max_bb_num
@@ -1072,9 +1091,7 @@ function cfg_simplify!(ir::IRCode)
             # Now walk the chain of blocks we merged.
             # If we end in something that may fall through,
             # we have to schedule that block next
-            while merged_succ[curr] != 0
-                curr = merged_succ[curr]
-            end
+            curr = follow_merged_succ(curr)
             terminator = ir.stmts[ir.cfg.blocks[curr].stmts[end]][:inst]
             if isa(terminator, GotoNode) || isa(terminator, ReturnNode)
                 break
@@ -1082,19 +1099,24 @@ function cfg_simplify!(ir::IRCode)
             curr += 1
         end
     end
+
+    # Figure out how predecessors should be renamed
     bb_rename_pred = zeros(Int, length(bbs))
     for i = 1:length(bbs)
         if merged_succ[i] != 0
+            # Block `i` should no longer be a predecessor (before renaming)
+            # because it is being merged with its sole successor
             bb_rename_pred[i] = -1
             continue
         end
-        bbnum = i
-        while merge_into[bbnum] != 0
-            bbnum = merge_into[bbnum]
-        end
+        bbnum = follow_merge_into(i)
         bb_rename_pred[i] = bb_rename_succ[bbnum]
     end
+
+    # Compute map from new to old blocks
     result_bbs = Int[findfirst(j->i==j, bb_rename_succ) for i = 1:max_bb_num-1]
+
+    # Compute new block lengths
     result_bbs_lengths = zeros(Int, max_bb_num-1)
     for (idx, orig_bb) in enumerate(result_bbs)
         ms = orig_bb
@@ -1103,41 +1125,32 @@ function cfg_simplify!(ir::IRCode)
             ms = merged_succ[ms]
         end
     end
+
+    # Compute statement indices the new blocks start at
     bb_starts = Vector{Int}(undef, 1+length(result_bbs_lengths))
     bb_starts[1] = 1
     for i = 1:length(result_bbs_lengths)
         bb_starts[i+1] = bb_starts[i] + result_bbs_lengths[i]
     end
-    # Figure out the pred and succ lists for each basic block in our merged result
-    cresult_bbs = let result_bbs = result_bbs,
-                      merged_succ = merged_succ,
-                      merge_into = merge_into,
-                      bbs = bbs,
-                      bb_rename_succ = bb_rename_succ
-            function compute_succs(i)
-                # Look at the original successor
-                orig_bb = result_bbs[i]
-                while merged_succ[orig_bb] != 0
-                    orig_bb = merged_succ[orig_bb]
-                end
-                newsuccs = map(i->bb_rename_succ[i], bbs[orig_bb].succs)
-                return newsuccs
-            end
-            function compute_preds(i)
-                orig_bb = result_bbs[i]
-                preds = bbs[orig_bb].preds
-                newpreds = map(preds) do pred
-                    while merge_into[pred] != 0
-                        pred = merge_into[pred]
-                    end
-                    bb_rename_succ[pred]
-                end
-                return newpreds
-            end
-            BasicBlock[BasicBlock(
-                StmtRange(bb_starts[i], i + 1 > length(bb_starts) ? length(compact.result) : bb_starts[i + 1] - 1),
-                compute_preds(i), compute_succs(i)) for i = 1:length(result_bbs)]
-        end
+
+    # Compute (renamed) successors and predecessors given (renamed) block
+    function compute_succs(i)
+        orig_bb = follow_merged_succ(result_bbs[i])
+        return map(i -> bb_rename_succ[i], bbs[orig_bb].succs)
+    end
+    function compute_preds(i)
+        orig_bb = result_bbs[i]
+        preds = bbs[orig_bb].preds
+        return map(pred -> bb_rename_pred[pred], preds)
+    end
+
+    cresult_bbs = BasicBlock[
+        BasicBlock(StmtRange(bb_starts[i],
+                             i+1 > length(bb_starts) ?
+                                length(compact.result) : bb_starts[i+1]-1),
+                   compute_preds(i),
+                   compute_succs(i))
+        for i = 1:length(result_bbs)]
     compact = IncrementalCompact(ir, true)
     # Run instruction compaction to produce the result,
     # but we're messing with the CFG
@@ -1146,6 +1159,7 @@ function cfg_simplify!(ir::IRCode)
     compact.bb_rename_succ = bb_rename_succ
     compact.bb_rename_pred = bb_rename_pred
     compact.result_bbs = cresult_bbs
+    result_idx = 1
     for (idx, orig_bb) in enumerate(result_bbs)
         ms = orig_bb
         while ms != 0
@@ -1165,7 +1179,6 @@ function cfg_simplify!(ir::IRCode)
             ms = merged_succ[ms]
         end
     end
-
     compact.active_result_bb = length(bb_starts)
     return finish(compact)
 end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index e4b9d970e9293..34bd185eb2e73 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -256,6 +256,30 @@ let m = Meta.@lower 1 + 1
     @test isa(ret_2, Core.Compiler.ReturnNode) && ret_2.val == 2
 end
 
+let m = Meta.@lower 1 + 1
+    # Test that CFG simplify doesn't try to merge every block in a loop into
+    # its predecessor
+    @assert Meta.isexpr(m, :thunk)
+    src = m.args[1]::Core.CodeInfo
+    src.code = Any[
+        # Block 1
+        Core.Compiler.GotoNode(2),
+        # Block 2
+        Core.Compiler.GotoNode(3),
+        # Block 3
+        Core.Compiler.GotoNode(1)
+    ]
+    nstmts = length(src.code)
+    src.ssavaluetypes = nstmts
+    src.codelocs = fill(Int32(1), nstmts)
+    src.ssaflags = fill(Int32(0), nstmts)
+    ir = Core.Compiler.inflate_ir(src)
+    Core.Compiler.verify_ir(ir)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) == 1
+end
+
 # Issue #29213
 function f_29213()
     while true

From 32fbdceb500c0453c6ee83a3db0c3e3c59e320d0 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Tue, 26 Nov 2019 16:34:43 -0500
Subject: [PATCH 04/16] Fix bug in `kill_edge!` which caused edges to be
 removed from the wrong phi nodes

---
 base/compiler/ssair/ir.jl | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 9fb4ff6745188..7a969a88fa444 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -893,10 +893,13 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
             compact.result[last(stmts)][:inst] = ReturnNode()
         end
     else
-        # We need to remove this edge from any phi nodes
+        # Remove this edge from all phi nodes in `to` block
+        # NOTE: It is possible for `to` to contain only `nothing` statements,
+        #       so we must be careful to stop at its last statement
         if to < active_bb
-            idx = first(compact.result_bbs[compact.bb_rename_succ[to]].stmts)
-            while idx < length(compact.result)
+            stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts
+            idx = first(stmts)
+            while idx <= last(stmts)
                 stmt = compact.result[idx][:inst]
                 stmt === nothing && continue
                 isa(stmt, PhiNode) || break
@@ -908,8 +911,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
                 idx += 1
             end
         else
-            idx = first(compact.ir.cfg.blocks[to].stmts)
-            for stmt in CompactPeekIterator(compact, idx)
+            stmts = compact.ir.cfg.blocks[to].stmts
+            for stmt in CompactPeekIterator(compact, first(stmts), last(stmts))
                 stmt === nothing && continue
                 isa(stmt, PhiNode) || break
                 i = findfirst(x-> x === from, stmt.edges)
@@ -1131,10 +1134,19 @@ end
 struct CompactPeekIterator
     compact::IncrementalCompact
     start_idx::Int
+    end_idx::Int
+end
+
+function CompactPeekIterator(compact::IncrementalCompact, start_idx::Int)
+    return CompactPeekIterator(compact, start_idx, 0)
 end
 
 entry_at_idx(entry::NewNodeInfo, idx::Int) = entry.attach_after ? entry.pos == idx - 1 : entry.pos == idx
 function iterate(it::CompactPeekIterator, (idx, aidx, bidx)::NTuple{3, Int}=(it.start_idx, it.compact.new_nodes_idx, 1))
+    if it.end_idx > 0 && idx > it.end_idx
+        return nothing
+    end
+
     # TODO: Take advantage of the fact that these arrays are sorted
     # TODO: this return value design is horrible
     compact = it.compact

From 425939e4a827127581768139c32bd9988470af20 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Sat, 14 Dec 2019 17:21:46 -0500
Subject: [PATCH 05/16] Fix bug in which uses of SSA values processed in
 `just_fixup!` were not counted

This fixes #29253, which was caused by `simple_dce!` erroneously erasing SSA
values that did not appear to be used, because these uses were only discovered
in `just_fixup!` at the end of iterating over an `IncrementalCompact`.
---
 base/compiler/ssair/ir.jl | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 7a969a88fa444..7de77542cb72b 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -283,11 +283,13 @@ function setindex!(x::IRCode, @nospecialize(repl), s::SSAValue)
     return x
 end
 
-
+# SSA values that need renaming
 struct OldSSAValue
     id::Int
 end
 
+# SSA values that are in `new_new_nodes` of an `IncrementalCompact` and are to
+# be actually inserted next time (they become `new_nodes` next time)
 struct NewSSAValue
     id::Int
 end
@@ -824,7 +826,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}
     return values
 end
 
-function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int}, do_rename_ssa::Bool)
+function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, do_rename_ssa::Bool)
     id = val.id
     if id > length(ssanums)
         return val
@@ -833,14 +835,14 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssa::Vector{Int
         val = ssanums[id]
     end
     if isa(val, SSAValue)
-        if used_ssa !== nothing
-            used_ssa[val.id] += 1
+        if used_ssas !== nothing
+            used_ssas[val.id] += 1
         end
     end
     return val
 end
 
-function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool)
+function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool)
     urs = userefs(stmt)
     for op in urs
         val = op[]
@@ -848,7 +850,7 @@ function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssa::Vec
             push!(late_fixup, result_idx)
         end
         if isa(val, SSAValue)
-            val = renumber_ssa2(val, ssanums, used_ssa, do_rename_ssa)
+            val = renumber_ssa2(val, ssanums, used_ssas, do_rename_ssa)
         end
         if isa(val, OldSSAValue) || isa(val, NewSSAValue)
             push!(late_fixup, result_idx)
@@ -1308,10 +1310,17 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt))
         for ur in urs
             val = ur[]
             if isa(val, NewSSAValue)
-                ur[] = SSAValue(length(compact.result) + val.id)
+                val = SSAValue(length(compact.result) + val.id)
             elseif isa(val, OldSSAValue)
-                ur[] = compact.ssa_rename[val.id]
+                val = compact.ssa_rename[val.id]
+            end
+            if isa(val, SSAValue) && val.id <= length(compact.used_ssas)
+                # If `val.id` is greater than the length of `compact.result` or
+                # `compact.used_ssas`, this SSA value is in `new_new_nodes`, so
+                # don't count the use
+                compact.used_ssas[val.id] += 1
             end
+            ur[] = val
         end
         return urs[]
     end

From 9fd642058c65601119acf085cf1d472214d18553 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Sun, 2 Aug 2020 22:08:52 -0700
Subject: [PATCH 06/16] Fix small bug in #36684

PR #36684 changes `iterate(IncrementalCompact)` to return an extra index, but
leaves its arguments unchanged. However, the PR decremented the index argument
in a particular recursive call to `iterate`. This caused `iterate` not to
recognise that it was done when `allow_cfg_transforms` was turned on.
---
 base/compiler/ssair/ir.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 7de77542cb72b..2ed5e6355e399 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -1206,7 +1206,7 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
         # Move to next block
         compact.idx += 1
         if finish_current_bb!(compact, active_bb, old_result_idx, true)
-            return iterate(compact, (compact.idx-1, active_bb + 1))
+            return iterate(compact, (compact.idx, active_bb + 1))
         else
             return Pair{Pair{Int, Int}, Any}(Pair{Int,Int}(compact.idx-1, old_result_idx), compact.result[old_result_idx][:inst]), (compact.idx, active_bb + 1)
         end

From 524452b0d8bb58e5174b6ba84cc0f48a8e7e8570 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Tue, 15 Oct 2019 13:08:55 -0400
Subject: [PATCH 07/16] Implement algorithm for updating dominator tree with
 CFG edge insertions and deletions

The DFS tree associated with a CFG now keeps track of postorder as well as
preorder numbers. The DFS tree, as well as the state associated with the SNCA
algorithm for finding (immediate) dominators is now stored in DomTree and
reused for Dynamic SNCA.
---
 base/compiler/ssair/domtree.jl | 681 +++++++++++++++++++++------------
 base/compiler/ssair/ir.jl      |  16 +
 base/compiler/ssair/passes.jl  |   2 +-
 test/compiler/ssair.jl         |  84 +++-
 4 files changed, 542 insertions(+), 241 deletions(-)

diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index f9b407f9ddb3e..66a16fc7e9a53 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -1,63 +1,482 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# This file implements the Semi-NCA (SNCA) dominator tree construction
+# described in Georgiadis' PhD thesis [LG05], which itself is a simplification
+# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches
+# the algorithm choice in LLVM and seems to be a sweet spot in implementation
+# simplicity and efficiency.
+#
+# This file also implements an extension of SNCA that supports updating the
+# dominator tree with insertion and deletion of edges in the control flow
+# graph, described in [GI16] as Dynamic SNCA. DSNCA was chosen over DBS, a
+# different algorithm which achieves the best overall performance in [GI16],
+# because it is simpler to understand and implement, performs well with edge
+# deletions, and is of similar performance overall.
+#
+# SNCA works by first computing semidominators, then computing immediate
+# dominators from them. The semidominator of a node is the node with minimum
+# preorder number such that there is a semidominator path from it to the node.
+# A semidominator path is a path in which the preorder numbers of all nodes not
+# at the endpoints are greater than the preorder number of the last node.
+# Intuitively, the semidominator approximates the immediate dominator of a node
+# by taking the path (in the CFG) that gets as close to the root as possible
+# while avoiding ancestors of the node in the DFS tree.
+#
+# In computing the semidominators, SNCA performs "path compression" whenever a
+# node has a nontrivial semidominator (i.e. a semidominator that is not just
+# its parent in the DFS tree). Path compression propagates the "label" of a
+# node, which represents a possible semidominator with associated semidominator
+# path passing through that node.
+#
+# For example, path compression will be performed for the following CFG, where
+# the edge not in the DFS tree is marked with asterisks. Note that nodes are
+# labeled with their preorder numbers, and all edges point downward.
+#
+#     1
+#     |\
+#     | \
+#     |  4
+#     |  |
+#     2  5
+#     |  |
+#     |  6
+#     | *
+#     |*
+#     3
+#
+# There is a nontrivial semidominator path from 1 to 3, passing through 4, 5,
+# and 6. Stepping through the whole algorithm on paper with an example like
+# this is very helpful for understanding how it works.
+#
+# DSNCA runs the whole algorithm from scratch if the DFS tree is invalidated by
+# the insertion or deletion, but otherwise recomputes a subset of the
+# semidominators (all immediate dominators then need to be recomputed).
+#
+# [LG05]  Linear-Time Algorithms for Dominators and Related Problems
+#         Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
+#         ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
+#
+# [LT79]  A fast algorithm for finding dominators in a flowgraph
+#         Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1
+#         http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf
+#
+# [GI16]  An Experimental Study of Dynamic Dominators
+#         Loukas Georgiadis, Giuseppe F. Italiano, Luigi Laura, Federico
+#         Santaroni, April 2016
+#         https://arxiv.org/abs/1604.02711
+
+# We could make these real structs, but probably not worth the extra
+# overhead. Still, give them names for documentary purposes.
+const BBNumber = Int
+const PreNumber = Int
+const PostNumber = Int
+
+struct DFSTree
+    # These map between BB number and pre- or postorder numbers
+    to_pre::Vector{PreNumber}
+    from_pre::Vector{BBNumber}
+    to_post::Vector{PostNumber}
+    from_post::Vector{BBNumber}
+
+    # Records parent relationships in the DFS tree
+    # (preorder number -> preorder number)
+    # Storing it this way saves a few lookups in the snca_compress! algorithm
+    to_parent_pre::Vector{PreNumber}
+end
+
+function DFSTree(n_blocks::Int)
+    return DFSTree(zeros(PreNumber, n_blocks),
+                   Vector{BBNumber}(undef, n_blocks),
+                   zeros(PostNumber, n_blocks),
+                   Vector{BBNumber}(undef, n_blocks),
+                   zeros(PreNumber, n_blocks))
+end
+
+copy(D::DFSTree) = DFSTree(copy(D.to_pre),
+                           copy(D.from_pre),
+                           copy(D.to_post),
+                           copy(D.from_post),
+                           copy(D.to_parent_pre))
+
+function copy!(dst::DFSTree, src::DFSTree)
+    copy!(dst.to_pre, src.to_pre)
+    copy!(dst.from_pre, src.from_pre)
+    copy!(dst.to_post, src.to_post)
+    copy!(dst.from_post, src.from_post)
+    copy!(dst.to_parent_pre, src.to_parent_pre)
+    return dst
+end
+
+length(D::DFSTree) = length(D.from_pre)
+
+function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
+    copy!(D, DFSTree(length(blocks)))
+    to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
+    pre_num = 1
+    post_num = 1
+    while !isempty(to_visit)
+        # Because we want the postorder number as well as the preorder number,
+        # we don't pop the current node from the stack until we're moving up
+        # the tree
+        (current_node_bb, parent_pre, pushed_children) = to_visit[end]
+
+        if pushed_children
+            # Going up the DFS tree, so all we need to do is record the
+            # postorder number, then move on
+            D.to_post[current_node_bb] = post_num
+            D.from_post[post_num] = current_node_bb
+            post_num += 1
+            pop!(to_visit)
+
+        elseif D.to_pre[current_node_bb] != 0
+            # Node has already been visited, move on
+            pop!(to_visit)
+            continue
+        else
+            # Going down the DFS tree
+
+            # Record preorder number
+            D.to_pre[current_node_bb] = pre_num
+            D.from_pre[pre_num] = current_node_bb
+            D.to_parent_pre[pre_num] = parent_pre
+
+            # Record that children (will) have been pushed
+            to_visit[end] = (current_node_bb, parent_pre, true)
+
+            # Push children to the stack
+            for succ_bb in cfg.blocks[current_node_bb].succs
+                push!(to_visit, (succ_bb, pre_num, false))
+            end
+
+            pre_num += 1
+        end
+    end
+
+    # If all blocks are reachable, this is a no-op, otherwise, we shrink these
+    # arrays.
+    resize!(D.from_pre, pre_num - 1)
+    resize!(D.from_post, post_num - 1) # should be same size as pre_num - 1
+    resize!(D.to_parent_pre, pre_num - 1)
+
+    return D
+end
+
+DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks)
+
+"""
+Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
+there are three separate length `n` arrays, `label`, `semi` and `ancestor`.
+Instead, for efficiency, we use one array in a array-of-structs style setup.
+"""
+struct SNCAData
+    semi::PreNumber
+    label::PreNumber
+end
+
 "Represents a Basic Block, in the DomTree"
 struct DomTreeNode
     # How deep we are in the DomTree
     level::Int
     # The BB indices in the CFG for all Basic Blocks we immediately dominate
-    children::Vector{Int}
+    children::Vector{BBNumber}
 end
-DomTreeNode() = DomTreeNode(1, Vector{Int}())
+
+DomTreeNode() = DomTreeNode(1, Vector{BBNumber}())
 
 "Data structure that encodes which basic block dominates which."
 struct DomTree
-    # Which basic block immediately dominates each basic block (ordered by BB indices)
-    # Note: this is the inverse of the nodes, children field
-    idoms::Vector{Int}
+    # These can be reused when updating domtree dynamically
+    dfs_tree::DFSTree
+    snca_state::Vector{SNCAData}
+
+    # Which basic block immediately dominates each basic block, using BB indices
+    idoms_bb::Vector{BBNumber}
 
     # The nodes in the tree (ordered by BB indices)
     nodes::Vector{DomTreeNode}
 end
 
+function DomTree()
+    return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
+end
+
+function construct_domtree(cfg::CFG)
+    return update_domtree!(cfg, DomTree(), true, 0)
+end
+
+function update_domtree!(cfg::CFG, domtree::DomTree,
+                         recompute_dfs::Bool, max_pre::PreNumber)
+    if recompute_dfs
+        DFS!(domtree.dfs_tree, blocks)
+    end
+
+    if max_pre == 0
+        max_pre = length(domtree.dfs_tree)
+    end
+
+    SNCA!(domtree, cfg, max_pre)
+    compute_domtree_nodes!(domtree)
+    return domtree
+end
+
+function compute_domtree_nodes!(domtree::DomTree)
+    # Compute children
+    copy!(domtree.nodes,
+          DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)])
+    for (idx, idom) in Iterators.enumerate(domtree.idoms_bb)
+        (idx == 1 || idom == 0) && continue
+        push!(domtree.nodes[idom].children, idx)
+    end
+    # Recursively set level
+    update_level!(domtree.nodes, 1, 1)
+    return domtree.nodes
+end
+
+function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int)
+    worklist = Tuple{BBNumber, Int}[(node, level)]
+    while !isempty(worklist)
+        (node, level) = pop!(worklist)
+        nodes[node] = DomTreeNode(level, nodes[node].children)
+        foreach(nodes[node].children) do child
+            push!(worklist, (child, level+1))
+        end
+    end
+end
+
+"""
+The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05]. Note that the
+pseudocode in [LG05] is not entirely accurate. The best way to understand
+what's happening is to read [LT79], then the description of SLT in [LG05]
+(warning: inconsistent notation), then the description of Semi-NCA.
+"""
+function SNCA!(domtree::DomTree, cfg::CFG, max_pre::PreNumber)
+    D = domtree.dfs_tree
+    state = domtree.snca_state
+    # There may be more blocks than are reachable in the DFS / dominator tree
+    n_blocks = length(cfg.blocks)
+    n_nodes = length(D)
+
+    # `label` is initialized to the identity mapping (though the paper doesn't
+    # make that clear). The rationale for this is Lemma 2.4 in [LG05] (i.e.
+    # Theorem 4 in [LT79]). Note however, that we don't ever look at `semi`
+    # until it is fully initialized, so we could leave it uninitialized here if
+    # we wanted to.
+    resize!(state, n_nodes)
+    for w in 1:max_pre
+        state[w] = SNCAData(typemax(PreNumber), w)
+    end
+
+    # Calculate semidominators, but only for blocks with preorder number up to
+    # max_pre
+    ancestors = copy(D.to_parent_pre)
+    for w::PreNumber in reverse(2:max_pre)
+        # LLVM initializes this to the parent, the paper initializes this to
+        # `w`, but it doesn't really matter (the parent is a predecessor, so at
+        # worst we'll discover it below). Save a memory reference here.
+        semi_w = typemax(PreNumber)
+        last_linked = PreNumber(w + 1)
+        for v ∈ cfg.blocks[D.from_pre[w]].preds
+            # For the purpose of the domtree, ignore virtual predecessors into
+            # catch blocks.
+            v == 0 && continue
+
+            v_pre = D.to_pre[v]
+
+            # Ignore unreachable predecessors
+            v_pre == 0 && continue
+
+            # N.B.: This conditional is missing from the pseudocode in figure
+            # 2.8 of [LG05]. It corresponds to the `ancestor[v] != 0` check in
+            # the `eval` implementation in figure 2.6
+            if v_pre >= last_linked
+                # `v` has already been processed, so perform path compression
+
+                # For performance, if the number of ancestors is small avoid
+                # the extra allocation of the worklist.
+                if length(ancestors) <= 32
+                    snca_compress!(state, ancestors, v_pre, last_linked)
+                else
+                    snca_compress_worklist!(state, ancestors, v_pre, last_linked)
+                end
+            end
+
+            # The (preorder number of the) semidominator of a block is the
+            # minimum over the labels of its predecessors
+            semi_w = min(semi_w, state[v_pre].label)
+        end
+        state[w] = SNCAData(semi_w, semi_w)
+    end
+
+    # Compute immediate dominators, which for a node must be the nearest common
+    # ancestor in the (immediate) dominator tree between its semidominator and
+    # its parent (see Lemma 2.6 in [LG05]).
+    idoms_pre = copy(D.to_parent_pre)
+    for v in 2:n_nodes
+        idom = idoms_pre[v]
+        vsemi = state[v].semi
+        while idom > vsemi
+            idom = idoms_pre[idom]
+        end
+        idoms_pre[v] = idom
+    end
+
+    # Express idoms in BB indexing
+    resize!(domtree.idoms_bb, n_blocks)
+    for i::BBNumber in 1:n_blocks
+        if i == 1 || D.to_pre[i] == 0
+            domtree.idoms_bb[i] = 0
+        else
+            domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]]
+        end
+    end
+end
+
 """
-    Checks if bb1 dominates bb2.
-    bb1 and bb2 are indexes into the CFG blocks.
-    bb1 dominates bb2 if the only way to enter bb2 is via bb1.
-    (Other blocks may be in between, e.g bb1->bbX->bb2).
+Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the
+modification suggested in the paper to use `last_linked` to determine whether
+an ancestor has been processed rather than storing `0` in the ancestor array.
 """
-function dominates(domtree::DomTree, bb1::Int, bb2::Int)
+function snca_compress!(state::Vector{SNCAData}, ancestors::Vector{PreNumber},
+                        v::PreNumber, last_linked::PreNumber)
+    u = ancestors[v]
+    @assert u < v
+    if u >= last_linked
+        snca_compress!(state, ancestors, u, last_linked)
+        if state[u].label < state[v].label
+            state[v] = SNCAData(state[v].semi, state[u].label)
+        end
+        ancestors[v] = ancestors[u]
+    end
+    nothing
+end
+
+function snca_compress_worklist!(
+        state::Vector{SNCAData}, ancestors::Vector{PreNumber},
+        v::PreNumber, last_linked::PreNumber)
+    # TODO: There is a smarter way to do this
+    u = ancestors[v]
+    worklist = Tuple{PreNumber, PreNumber}[(u,v)]
+    @assert u < v
+    while !isempty(worklist)
+        u, v = last(worklist)
+        if u >= last_linked
+            if ancestors[u] >= last_linked
+                push!(worklist, (ancestors[u], u))
+                continue
+            end
+            if state[u].label < state[v].label
+                state[v] = SNCAData(state[v].semi, state[u].label)
+            end
+            ancestors[v] = ancestors[u]
+        end
+        pop!(worklist)
+    end
+end
+
+"Given an updated CFG, update the given dominator tree with an inserted edge."
+function domtree_insert_edge!(domtree::DomTree, cfg::CFG,
+                              from::BBNumber, to::BBNumber)
+    # Implements Section 3.1 of [GI16]
+    dt        = domtree.dfs_tree
+    from_pre  = dt.to_pre[from]
+    to_pre    = dt.to_pre[to]
+    from_post = dt.to_post[from]
+    to_post   = dt.to_post[to]
+    if to_pre == 0 || (from_pre < to_pre && from_post < to_post)
+        # The DFS tree is invalidated by the edge insertion, so run from
+        # scratch
+        update_domtree!(cfg, domtree, true, 0)
+    else
+        # DFS tree is still valid, so update only affected nodes
+        update_domtree!(cfg, domtree, false, to_pre)
+    end
+
+    return domtree
+end
+
+"Given an updated CFG, update the given dominator tree with a deleted edge."
+function domtree_delete_edge!(domtree::DomTree, cfg::CFG,
+                              from::BBNumber, to::BBNumber)
+    # Implements Section 3.1 of [GI16]
+    if is_parent(domtree.dfs_tree, from, to)
+        # The `from` block is the parent of the `to` block in the DFS tree, so
+        # deleting the edge invalidates the DFS tree, so start from scratch
+        update_domtree!(cfg, domtree, true, 0)
+    elseif on_semidominator_path(domtree, from, to)
+        # Recompute semidominators for blocks with preorder number up to that
+        # of `to` block. Semidominators for blocks with preorder number greater
+        # than that of `to` aren't affected because no semidominator path to
+        # the block can pass through the `to` block (the preorder number of
+        # `to` would be lower than those of these blocks, and `to` is not their
+        # parent in the DFS tree).
+        to_pre = domtree.dfs_tree.to_pre[to]
+        update_domtree!(cfg, domtree, false, to_pre)
+    end
+    # Otherwise, dominator tree is not affected
+
+    return domtree
+end
+
+"Check if x is the parent of y in the given DFS tree."
+function is_parent(dfs_tree::DFSTree, x::BBNumber, y::BBNumber)
+    x_pre = dfs_tree.to_pre[x]
+    y_pre = dfs_tree.to_pre[y]
+    return x_pre == dfs_tree.to_parent_pre[y_pre]
+end
+
+"""
+Check if x is on some semidominator path from the semidominator of y to y,
+assuming there is an edge from x to y.
+"""
+function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber)
+    x_pre = domtree.dfs_tree.to_pre[x]
+    y_pre = domtree.dfs_tree.to_pre[y]
+
+    semi_y = domtree.snca_state[y_pre].semi
+    current_block = x_pre
+
+    # Follow the semidominators of `x` up the DFS tree to see if we ever reach
+    # the semidominator of `y`. If so, `x` is on a semidominator path between
+    # `y` and its semidominator. We can stop if the preorder number of the
+    # semidominators becomes less than that of the semidominator of `y`,
+    # because it can only decrease further.
+    while current_block >= semi_y
+        if semi_y == current_block
+            return true
+        end
+        current_block = domtree.snca_state[current_block].semi
+    end
+    return false
+end
+
+"""
+Checks if bb1 dominates bb2.
+bb1 and bb2 are indexes into the CFG blocks.
+bb1 dominates bb2 if the only way to enter bb2 is via bb1.
+(Other blocks may be in between, e.g bb1->bbX->bb2).
+"""
+function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber)
     bb1 == bb2 && return true
     target_level = domtree.nodes[bb1].level
     source_level = domtree.nodes[bb2].level
     source_level < target_level && return false
     for _ in (source_level - 1):-1:target_level
-        bb2 = domtree.idoms[bb2]
+        bb2 = domtree.idoms_bb[bb2]
     end
     return bb1 == bb2
 end
 
-bb_unreachable(domtree::DomTree, bb::Int) = bb != 1 && domtree.nodes[bb].level == 1
-
-function update_level!(domtree::Vector{DomTreeNode}, node::Int, level::Int)
-    worklist = Tuple{Int, Int}[(node, level)]
-    while !isempty(worklist)
-        (node, level) = pop!(worklist)
-        domtree[node] = DomTreeNode(level, domtree[node].children)
-        foreach(domtree[node].children) do child
-            push!(worklist, (child, level+1))
-        end
-    end
-end
+bb_unreachable(domtree::DomTree, bb::BBNumber) = bb != 1 && domtree.dfs_tree.to_pre[bb] == 0
 
 "Iterable data structure that walks though all dominated blocks"
 struct DominatedBlocks
     domtree::DomTree
-    worklist::Vector{Int}
+    worklist::Vector{BBNumber}
 end
 
 "Returns an iterator that walks through all blocks dominated by the basic block at index `root`"
-function dominated(domtree::DomTree, root::Int)
-    doms = DominatedBlocks(domtree, Vector{Int}())
+function dominated(domtree::DomTree, root::BBNumber)
+    doms = DominatedBlocks(domtree, Vector{BBNumber}())
     push!(doms.worklist, root)
     doms
 end
@@ -115,213 +534,3 @@ function naive_idoms(cfg::CFG)
     end
     idoms
 end
-
-# Construct Dom Tree
-function construct_domtree(cfg::CFG)
-    idoms = SNCA(cfg)
-    # Compute children
-    nblocks = length(cfg.blocks)
-    domtree = DomTreeNode[DomTreeNode() for _ = 1:nblocks]
-    for (idx, idom) in Iterators.enumerate(idoms)
-        (idx == 1 || idom == 0) && continue
-        push!(domtree[idom].children, idx)
-    end
-    # Recursively set level
-    update_level!(domtree, 1, 1)
-    DomTree(idoms, domtree)
-end
-
-#================================ [SNCA] ======================================#
-#
-#   This section implements the Semi-NCA (SNCA) dominator tree construction from
-#   described in Georgiadis' PhD thesis [LG05], which itself is a simplification
-#   of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches
-#   the algorithm choice in LLVM and seems to be a sweet spot in implementation
-#   simplicity and efficiency.
-#
-#   [LG05]  Linear-Time Algorithms for Dominators and Related Problems
-#           Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
-#           ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
-#
-#   [LT79]  A fast algorithm for finding dominators in a flowgraph
-#           Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1
-#           http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf
-#
-begin
-    # We could make these real structs, but probably not worth the extra
-    # overhead. Still, give them names for documentary purposes.
-    const BBNumber = UInt
-    const DFSNumber = UInt
-
-    """
-    Keeps the per-BB state of the Semi NCA algorithm. In the original
-    formulation, there are three separate length `n` arrays, `label`, `semi` and
-    `ancestor`. Instead, for efficiency, we use one array in a array-of-structs
-    style setup.
-    """
-    struct Node
-        semi::DFSNumber
-        label::DFSNumber
-    end
-
-    struct DFSTree
-        # Maps DFS number to BB number
-        numbering::Vector{BBNumber}
-        # Maps BB number to DFS number
-        reverse::Vector{DFSNumber}
-        # Records parent relationships in the DFS tree (DFS number -> DFS number)
-        # Storing it this way saves a few lookups in the snca_compress! algorithm
-        parents::Vector{DFSNumber}
-    end
-    length(D::DFSTree) = length(D.numbering)
-    preorder(D::DFSTree) = OneTo(length(D))
-    _drop(xs::AbstractUnitRange, n::Integer) = (first(xs)+n):last(xs)
-
-    function DFSTree(nblocks::Int)
-        DFSTree(
-            Vector{BBNumber}(undef, nblocks),
-            zeros(DFSNumber, nblocks),
-            Vector{DFSNumber}(undef, nblocks))
-    end
-
-    function DFS(cfg::CFG, current_node::BBNumber)::DFSTree
-        dfs = DFSTree(length(cfg.blocks))
-        # TODO: We could reuse the storage in DFSTree for our worklist. We're
-        # guaranteed for the worklist to be smaller than the remaining space in
-        # DFSTree
-        worklist = Tuple{DFSNumber, BBNumber}[(0, current_node)]
-        dfs_num = 1
-        parent = 0
-        while !isempty(worklist)
-            (parent, current_node) = pop!(worklist)
-            dfs.reverse[current_node] != 0 && continue
-            dfs.reverse[current_node] = dfs_num
-            dfs.numbering[dfs_num] = current_node
-            dfs.parents[dfs_num] = parent
-            for succ in cfg.blocks[current_node].succs
-                push!(worklist, (dfs_num, succ))
-            end
-            dfs_num += 1
-        end
-        # If all blocks are reachable, this is a no-op, otherwise,
-        # we shrink these arrays.
-        resize!(dfs.numbering, dfs_num - 1)
-        resize!(dfs.parents, dfs_num - 1)
-        dfs
-    end
-
-    """
-    Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the
-    modification suggested in the paper to use `last_linked` to determine
-    whether an ancestor has been processed rather than storing `0` in the
-    ancestor array.
-    """
-    function snca_compress!(state::Vector{Node}, ancestors::Vector{DFSNumber},
-                            v::DFSNumber, last_linked::DFSNumber)
-        u = ancestors[v]
-        @assert u < v
-        if u >= last_linked
-            snca_compress!(state, ancestors, u, last_linked)
-            if state[u].label < state[v].label
-                state[v] = Node(state[v].semi, state[u].label)
-            end
-            ancestors[v] = ancestors[u]
-        end
-        nothing
-    end
-
-    function snca_compress_worklist!(
-            state::Vector{Node}, ancestors::Vector{DFSNumber},
-            v::DFSNumber, last_linked::DFSNumber)
-        # TODO: There is a smarter way to do this
-        u = ancestors[v]
-        worklist = Tuple{DFSNumber, DFSNumber}[(u,v)]
-        @assert u < v
-        while !isempty(worklist)
-            u, v = last(worklist)
-            if u >= last_linked
-                if ancestors[u] >= last_linked
-                    push!(worklist, (ancestors[u], u))
-                    continue
-                end
-                if state[u].label < state[v].label
-                    state[v] = Node(state[v].semi, state[u].label)
-                end
-                ancestors[v] = ancestors[u]
-            end
-            pop!(worklist)
-        end
-    end
-
-    """
-        SNCA(cfg::CFG)
-
-    Determines a map from basic blocks to the block which immediately dominate them.
-    Expressed as indexes into `cfg.blocks`.
-
-    The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05].
-    Note that the pseudocode in [LG05] is not entirely accurate.
-    The best way to understand what's happening is to read [LT79], then the
-    description of SLT in [LG05] (warning: inconsistent notation), then
-    the description of Semi-NCA.
-    """
-    function SNCA(cfg::CFG)
-        D = DFS(cfg, BBNumber(1))
-        # `label` is initialized to the identity mapping (though
-        # the paper doesn't make that clear). The rational for this is Lemma
-        # 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't
-        # ever look at `semi` until it is fully initialized, so we could leave
-        # it uninitialized here if we wanted to.
-        state = Node[ Node(typemax(DFSNumber), w) for w in preorder(D) ]
-        # Initialize idoms to parents. Note that while idoms are eventually
-        # BB indexed, we keep it DFS indexed until a final post-processing
-        # pass to avoid extra memory references during the O(N^2) phase below.
-        idoms_dfs = copy(D.parents)
-        # We abuse the parents array as the ancestors array.
-        # Semi-NCA does not look at the parents array at all.
-        # SLT would, but never simultaneously, so we could still
-        # do this.
-        ancestors = D.parents
-        for w::DFSNumber ∈ reverse(_drop(preorder(D), 1))
-            # LLVM initializes this to the parent, the paper initializes this to
-            # `w`, but it doesn't really matter (the parent is a predecessor,
-            # so at worst we'll discover it below). Save a memory reference here.
-            semi_w = typemax(DFSNumber)
-            for v ∈ cfg.blocks[D.numbering[w]].preds
-                # For the purpose of the domtree, ignore virtual predecessors
-                # into catch blocks.
-                v == 0 && continue
-                vdfs = D.reverse[v]
-                # Ignore unreachable predecessors
-                vdfs == 0 && continue
-                last_linked = DFSNumber(w + 1)
-                # N.B.: This conditional is missing from the psuedocode
-                # in figure 2.8 of [LG05]. It corresponds to the
-                # `ancestor[v] != 0` check in the `eval` implementation in
-                # figure 2.6
-                if vdfs >= last_linked
-                    # For performance, if the number of ancestors is small
-                    # avoid the extra allocation of the worklist.
-                    if length(ancestors) <= 32
-                        snca_compress!(state, ancestors, vdfs, last_linked)
-                    else
-                        snca_compress_worklist!(state, ancestors, vdfs, last_linked)
-                    end
-                end
-                semi_w = min(semi_w, state[vdfs].label)
-            end
-            state[w] = Node(semi_w, semi_w)
-        end
-        for v ∈ _drop(preorder(D), 1)
-            idom = idoms_dfs[v]
-            vsemi = state[v].semi
-            while idom > vsemi
-                idom = idoms_dfs[idom]
-            end
-            idoms_dfs[v] = idom
-        end
-        # Reexpress the idom relationship in BB indexing
-        idoms_bb = Int[ (i == 1 || D.reverse[i] == 0) ? 0 : D.numbering[idoms_dfs[D.reverse[i]]] for i = 1:length(cfg.blocks) ]
-        idoms_bb
-    end
-end
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 2ed5e6355e399..8e61abf4fe000 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -36,6 +36,22 @@ struct CFG
                        # TODO: make this O(1) instead of O(log(n_blocks))?
 end
 
+function cfg_insert_edge!(cfg::CFG, from::Int, to::Int)
+    # Assumes that this edge does not already exist
+    push!(cfg.blocks[to].preds, from)
+    push!(cfg.blocks[from].succs, to)
+    nothing
+end
+
+function cfg_delete_edge!(cfg::CFG, from::Int, to::Int)
+    preds = cfg.blocks[to].preds
+    succs = cfg.blocks[from].succs
+    # Assumes that blocks appear at most once in preds and succs
+    deleteat!(preds, findfirst(x->x === from, preds)::Int)
+    deleteat!(succs, findfirst(x->x === to, succs)::Int)
+    nothing
+end
+
 function block_for_inst(index::Vector{Int}, inst::Int)
     return searchsortedfirst(index, inst, lt=(<=))
 end
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index c53e8660d7887..cb315633c1ec6 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -55,7 +55,7 @@ function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
     while !(curblock in allblocks)
-        curblock = domtree.idoms[curblock]
+        curblock = domtree.idoms_bb[curblock]
     end
     return curblock
 end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index e61d4b0f8d5d1..644a988d9d5a3 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -67,10 +67,10 @@ let cfg = CFG(BasicBlock[
     make_bb([2, 3] , [5]   ),
     make_bb([2, 4] , []    ),
 ], Int[])
-    dfs = Compiler.DFS(cfg, Compiler.BBNumber(1))
-    @test dfs.numbering[dfs.parents[dfs.reverse[5]]] == 4
+    dfs = Compiler.DFS(cfg)
+    @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
     let correct_idoms = Compiler.naive_idoms(cfg)
-        @test Compiler.SNCA(cfg) == correct_idoms
+        @test Compiler.construct_domtree(cfg).idoms_bb == correct_idoms
         # For completeness, reverse the order of pred/succ in the CFG and verify
         # the answer doesn't change (it does change the which node is chosen
         # as the semi-dominator, since it changes the DFS numbering).
@@ -81,7 +81,7 @@ let cfg = CFG(BasicBlock[
                 c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs))
                 d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
                 cfg′ = CFG(blocks, cfg.index)
-                @test Compiler.SNCA(cfg′) == correct_idoms
+                @test Compiler.construct_domtree(cfg′).idoms_bb == correct_idoms
             end
         end
     end
@@ -233,3 +233,79 @@ let ci = make_ci([
     ir = Core.Compiler.compact!(ir, true)
     @test Core.Compiler.verify_ir(ir) == nothing
 end
+
+# Test dynamic update of domtree with edge insertions and deletions in the
+# following CFG:
+#
+#     1,1
+#     |  \
+#     |   \
+#     |    3,4 <
+#     |    |    \
+#     2,2  4,5   |
+#     |    |    /
+#     |    6,6 /
+#     |   /
+#     |  /
+#     5,3
+#
+# Nodes indicate BB number, preorder number
+# Edges point down, except the arrow that points up
+let cfg = CFG(BasicBlock[
+        make_bb([],     [3, 2]), # the order of the successors is deliberate
+        make_bb([1],    [5]),    # and is to determine the preorder numbers
+        make_bb([1, 6], [4]),
+        make_bb([3],    [6]),
+        make_bb([2, 6], []),
+        make_bb([4],    [5, 3]),
+    ], Int[])
+    domtree = Compiler.construct_domtree(cfg)
+    @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+
+    # Test removal of edge between a parent and child in the DFS tree, which
+    # should trigger complete recomputation of domtree (first case in algorithm
+    # for removing edge from domtree dynamically)
+    Compiler.cfg_delete_edge!(cfg, 2, 5)
+    Compiler.domtree_delete_edge!(domtree, cfg, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 6, 4]
+    # Add edge back (testing first case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 2, 5)
+    Compiler.domtree_insert_edge!(domtree, cfg, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+
+    # Test second case in algorithm for removing edges from domtree, in which
+    # `from` is on a semidominator path from the semidominator of `to` to `to`
+    Compiler.cfg_delete_edge!(cfg, 6, 5)
+    Compiler.domtree_delete_edge!(domtree, cfg, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 2, 4]
+    # Add edge back (testing second case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 6, 5)
+    Compiler.domtree_insert_edge!(domtree, cfg, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+
+    # Test last case for removing edges, in which edge does not satisfy either
+    # of the above conditions
+    Compiler.cfg_delete_edge!(cfg, 6, 3)
+    Compiler.domtree_delete_edge!(domtree, cfg, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+    # Add edge back (testing second case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 6, 3)
+    Compiler.domtree_insert_edge!(domtree, cfg, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+
+    # Try removing all edges from root
+    Compiler.cfg_delete_edge!(cfg, 1, 2)
+    Compiler.domtree_delete_edge!(domtree, cfg, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 1, 3, 6, 4]
+    Compiler.cfg_delete_edge!(cfg, 1, 3)
+    Compiler.domtree_delete_edge!(domtree, cfg, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 0, 0, 0, 0]
+    # Add edges back
+    Compiler.cfg_insert_edge!(cfg, 1, 2)
+    Compiler.domtree_insert_edge!(domtree, cfg, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 0, 0, 2, 0]
+    Compiler.cfg_insert_edge!(cfg, 1, 3)
+    Compiler.domtree_insert_edge!(domtree, cfg, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+end

From c73f19e640162f26a8dbbc3eb83276c110d129f6 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Mon, 28 Oct 2019 15:39:26 -0400
Subject: [PATCH 08/16] Remove dead blocks as determined by reachability
 instead of number of predecessors

For now, just construct the domtree when we make an `IncrementalCompact` rather
than try to update it (the domtree) incrementally.
---
 base/compiler/ssair/ir.jl | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 8e61abf4fe000..54081db8d28e4 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -553,8 +553,9 @@ mutable struct IncrementalCompact
         if allow_cfg_transforms
             bb_rename = Vector{Int}(undef, length(blocks))
             cur_bb = 1
+            domtree = construct_domtree(blocks)
             for i = 1:length(bb_rename)
-                if i != 1 && length(blocks[i].preds) == 0
+                if bb_unreachable(domtree, i)
                     bb_rename[i] = -1
                 else
                     bb_rename[i] = cur_bb
@@ -1201,8 +1202,8 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
         resize!(compact, old_result_idx)
     end
     bb = compact.ir.cfg.blocks[active_bb]
-    if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && length(bb.preds) == 0
-        # No predecessors, kill the entire block.
+    if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && compact.bb_rename_succ[active_bb] == -1
+        # Dead block, so kill the entire block.
         compact.idx = last(bb.stmts)
         # Pop any remaining insertion nodes
         while compact.new_nodes_idx <= length(compact.perm)

From 12b34fe85411e6169bee33883a27a9d4f9ba7933 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Mon, 28 Oct 2019 15:30:39 -0400
Subject: [PATCH 09/16] Make domtree construction take `Vector{BasicBlock}` as
 input instead of CFG

This is in anticipation of domtrees being added to CFGs.
---
 base/compiler/ssair/domtree.jl | 40 +++++++++++++-------------
 base/compiler/ssair/driver.jl  |  2 +-
 base/compiler/ssair/passes.jl  |  2 +-
 base/compiler/ssair/verify.jl  |  2 +-
 test/compiler/irpasses.jl      |  4 +--
 test/compiler/ssair.jl         | 52 +++++++++++++++++-----------------
 6 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index 66a16fc7e9a53..c4781da584910 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -144,7 +144,7 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
             to_visit[end] = (current_node_bb, parent_pre, true)
 
             # Push children to the stack
-            for succ_bb in cfg.blocks[current_node_bb].succs
+            for succ_bb in blocks[current_node_bb].succs
                 push!(to_visit, (succ_bb, pre_num, false))
             end
 
@@ -200,11 +200,11 @@ function DomTree()
     return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
 end
 
-function construct_domtree(cfg::CFG)
-    return update_domtree!(cfg, DomTree(), true, 0)
+function construct_domtree(blocks::Vector{BasicBlock})
+    return update_domtree!(blocks, DomTree(), true, 0)
 end
 
-function update_domtree!(cfg::CFG, domtree::DomTree,
+function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
                          recompute_dfs::Bool, max_pre::PreNumber)
     if recompute_dfs
         DFS!(domtree.dfs_tree, blocks)
@@ -214,7 +214,7 @@ function update_domtree!(cfg::CFG, domtree::DomTree,
         max_pre = length(domtree.dfs_tree)
     end
 
-    SNCA!(domtree, cfg, max_pre)
+    SNCA!(domtree, blocks, max_pre)
     compute_domtree_nodes!(domtree)
     return domtree
 end
@@ -249,11 +249,11 @@ pseudocode in [LG05] is not entirely accurate. The best way to understand
 what's happening is to read [LT79], then the description of SLT in [LG05]
 (warning: inconsistent notation), then the description of Semi-NCA.
 """
-function SNCA!(domtree::DomTree, cfg::CFG, max_pre::PreNumber)
+function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
     D = domtree.dfs_tree
     state = domtree.snca_state
     # There may be more blocks than are reachable in the DFS / dominator tree
-    n_blocks = length(cfg.blocks)
+    n_blocks = length(blocks)
     n_nodes = length(D)
 
     # `label` is initialized to the identity mapping (though the paper doesn't
@@ -275,7 +275,7 @@ function SNCA!(domtree::DomTree, cfg::CFG, max_pre::PreNumber)
         # worst we'll discover it below). Save a memory reference here.
         semi_w = typemax(PreNumber)
         last_linked = PreNumber(w + 1)
-        for v ∈ cfg.blocks[D.from_pre[w]].preds
+        for v ∈ blocks[D.from_pre[w]].preds
             # For the purpose of the domtree, ignore virtual predecessors into
             # catch blocks.
             v == 0 && continue
@@ -373,8 +373,8 @@ function snca_compress_worklist!(
     end
 end
 
-"Given an updated CFG, update the given dominator tree with an inserted edge."
-function domtree_insert_edge!(domtree::DomTree, cfg::CFG,
+"Given updated blocks, update the given dominator tree with an inserted edge."
+function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock},
                               from::BBNumber, to::BBNumber)
     # Implements Section 3.1 of [GI16]
     dt        = domtree.dfs_tree
@@ -385,23 +385,23 @@ function domtree_insert_edge!(domtree::DomTree, cfg::CFG,
     if to_pre == 0 || (from_pre < to_pre && from_post < to_post)
         # The DFS tree is invalidated by the edge insertion, so run from
         # scratch
-        update_domtree!(cfg, domtree, true, 0)
+        update_domtree!(blocks, domtree, true, 0)
     else
         # DFS tree is still valid, so update only affected nodes
-        update_domtree!(cfg, domtree, false, to_pre)
+        update_domtree!(blocks, domtree, false, to_pre)
     end
 
     return domtree
 end
 
-"Given an updated CFG, update the given dominator tree with a deleted edge."
-function domtree_delete_edge!(domtree::DomTree, cfg::CFG,
+"Given updated blocks, update the given dominator tree with a deleted edge."
+function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock},
                               from::BBNumber, to::BBNumber)
     # Implements Section 3.1 of [GI16]
     if is_parent(domtree.dfs_tree, from, to)
         # The `from` block is the parent of the `to` block in the DFS tree, so
         # deleting the edge invalidates the DFS tree, so start from scratch
-        update_domtree!(cfg, domtree, true, 0)
+        update_domtree!(blocks, domtree, true, 0)
     elseif on_semidominator_path(domtree, from, to)
         # Recompute semidominators for blocks with preorder number up to that
         # of `to` block. Semidominators for blocks with preorder number greater
@@ -410,7 +410,7 @@ function domtree_delete_edge!(domtree::DomTree, cfg::CFG,
         # `to` would be lower than those of these blocks, and `to` is not their
         # parent in the DFS tree).
         to_pre = domtree.dfs_tree.to_pre[to]
-        update_domtree!(cfg, domtree, false, to_pre)
+        update_domtree!(blocks, domtree, false, to_pre)
     end
     # Otherwise, dominator tree is not affected
 
@@ -490,8 +490,8 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
     return (bb, nothing)
 end
 
-function naive_idoms(cfg::CFG)
-    nblocks = length(cfg.blocks)
+function naive_idoms(blocks::Vector{BasicBlock})
+    nblocks = length(blocks)
     # The extra +1 helps us detect unreachable blocks below
     dom_all = BitSet(1:nblocks+1)
     dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
@@ -499,10 +499,10 @@ function naive_idoms(cfg::CFG)
     while changed
         changed = false
         for n = 2:nblocks
-            if isempty(cfg.blocks[n].preds)
+            if isempty(blocks[n].preds)
                 continue
             end
-            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, cfg.blocks[n].preds))
+            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))
             new_doms = copy(dominators[firstp])
             for p in rest
                 intersect!(new_doms, dominators[p])
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index cb8e9ce01d902..cd4215a97de08 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -111,7 +111,7 @@ end
 
 function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState)
     # need `ci` for the slot metadata, IR for the code
-    @timeit "domtree 1" domtree = construct_domtree(ir.cfg)
+    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
     defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
     @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir`
     return ir
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index cb315633c1ec6..107a6cf389b34 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -726,7 +726,7 @@ function getfield_elim_pass!(ir::IRCode)
     # IR. This needs to be after we iterate through the IR with
     # `IncrementalCompact` because removing dead blocks can invalidate the
     # domtree.
-    @timeit "domtree 2" domtree = construct_domtree(ir.cfg)
+    @timeit "domtree 2" domtree = construct_domtree(ir.cfg.blocks)
 
     # Now go through any mutable structs and see which ones we can eliminate
     for (idx, (intermediaries, defuse)) in defuses
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index 67c03184d4ad5..b169018517bd7 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -66,7 +66,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
     # Verify CFG
     last_end = 0
     # Verify statements
-    domtree = construct_domtree(ir.cfg)
+    domtree = construct_domtree(ir.cfg.blocks)
     for (idx, block) in pairs(ir.cfg.blocks)
         if first(block.stmts) != last_end + 1
             #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 34bd185eb2e73..c6190b720c5be 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -34,7 +34,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src)
     Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg)
+    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
     phi = ir.stmts.inst[3]
@@ -62,7 +62,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src)
     Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg)
+    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
 end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 644a988d9d5a3..79e8c06ce26e1 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -67,10 +67,10 @@ let cfg = CFG(BasicBlock[
     make_bb([2, 3] , [5]   ),
     make_bb([2, 4] , []    ),
 ], Int[])
-    dfs = Compiler.DFS(cfg)
+    dfs = Compiler.DFS(cfg.blocks)
     @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
-    let correct_idoms = Compiler.naive_idoms(cfg)
-        @test Compiler.construct_domtree(cfg).idoms_bb == correct_idoms
+    let correct_idoms = Compiler.naive_idoms(cfg.blocks)
+        @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms
         # For completeness, reverse the order of pred/succ in the CFG and verify
         # the answer doesn't change (it does change the which node is chosen
         # as the semi-dominator, since it changes the DFS numbering).
@@ -81,7 +81,7 @@ let cfg = CFG(BasicBlock[
                 c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs))
                 d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
                 cfg′ = CFG(blocks, cfg.index)
-                @test Compiler.construct_domtree(cfg′).idoms_bb == correct_idoms
+                @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms
             end
         end
     end
@@ -259,53 +259,53 @@ let cfg = CFG(BasicBlock[
         make_bb([2, 6], []),
         make_bb([4],    [5, 3]),
     ], Int[])
-    domtree = Compiler.construct_domtree(cfg)
+    domtree = Compiler.construct_domtree(cfg.blocks)
     @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Test removal of edge between a parent and child in the DFS tree, which
     # should trigger complete recomputation of domtree (first case in algorithm
     # for removing edge from domtree dynamically)
     Compiler.cfg_delete_edge!(cfg, 2, 5)
-    Compiler.domtree_delete_edge!(domtree, cfg, 2, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 6, 4]
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4]
     # Add edge back (testing first case for insertion)
     Compiler.cfg_insert_edge!(cfg, 2, 5)
-    Compiler.domtree_insert_edge!(domtree, cfg, 2, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Test second case in algorithm for removing edges from domtree, in which
     # `from` is on a semidominator path from the semidominator of `to` to `to`
     Compiler.cfg_delete_edge!(cfg, 6, 5)
-    Compiler.domtree_delete_edge!(domtree, cfg, 6, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 2, 4]
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4]
     # Add edge back (testing second case for insertion)
     Compiler.cfg_insert_edge!(cfg, 6, 5)
-    Compiler.domtree_insert_edge!(domtree, cfg, 6, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Test last case for removing edges, in which edge does not satisfy either
     # of the above conditions
     Compiler.cfg_delete_edge!(cfg, 6, 3)
-    Compiler.domtree_delete_edge!(domtree, cfg, 6, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
     # Add edge back (testing second case for insertion)
     Compiler.cfg_insert_edge!(cfg, 6, 3)
-    Compiler.domtree_insert_edge!(domtree, cfg, 6, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Try removing all edges from root
     Compiler.cfg_delete_edge!(cfg, 1, 2)
-    Compiler.domtree_delete_edge!(domtree, cfg, 1, 2)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 1, 3, 6, 4]
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4]
     Compiler.cfg_delete_edge!(cfg, 1, 3)
-    Compiler.domtree_delete_edge!(domtree, cfg, 1, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 0, 0, 0, 0, 0]
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0]
     # Add edges back
     Compiler.cfg_insert_edge!(cfg, 1, 2)
-    Compiler.domtree_insert_edge!(domtree, cfg, 1, 2)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 0, 0, 2, 0]
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0]
     Compiler.cfg_insert_edge!(cfg, 1, 3)
-    Compiler.domtree_insert_edge!(domtree, cfg, 1, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg) == [0, 1, 1, 3, 1, 4]
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 end

From 88534d5890505e15da3e77f36915c47efefafede Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Wed, 30 Oct 2019 18:02:53 -0400
Subject: [PATCH 10/16] Add a domtree to every CFG, avoiding constructing them
 explicitly when needed

Every time a CFG is created, its corresponding dominator tree is as well.
---
 base/compiler/ssair/domtree.jl  | 13 +++++++++---
 base/compiler/ssair/driver.jl   |  3 +--
 base/compiler/ssair/ir.jl       | 14 +++++++++++--
 base/compiler/ssair/passes.jl   | 27 +++++++++---------------
 base/compiler/ssair/slot2ssa.jl | 22 ++++++++++----------
 base/compiler/ssair/verify.jl   | 25 +++++++++++++++-------
 test/compiler/irpasses.jl       |  6 ++----
 test/compiler/ssair.jl          | 37 ++++++++++++---------------------
 8 files changed, 77 insertions(+), 70 deletions(-)

diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index c4781da584910..310e76a463666 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -173,6 +173,8 @@ struct SNCAData
     label::PreNumber
 end
 
+copy(d::SNCAData) = SNCAData(d.semi, d.label)
+
 "Represents a Basic Block, in the DomTree"
 struct DomTreeNode
     # How deep we are in the DomTree
@@ -183,6 +185,8 @@ end
 
 DomTreeNode() = DomTreeNode(1, Vector{BBNumber}())
 
+copy(n::DomTreeNode) = DomTreeNode(n.level, copy(n.children))
+
 "Data structure that encodes which basic block dominates which."
 struct DomTree
     # These can be reused when updating domtree dynamically
@@ -196,9 +200,12 @@ struct DomTree
     nodes::Vector{DomTreeNode}
 end
 
-function DomTree()
-    return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
-end
+DomTree() = DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
+
+copy(d::DomTree) = DomTree(copy(d.dfs_tree),
+                           copy(d.snca_state),
+                           copy(d.idoms_bb),
+                           copy(d.nodes))
 
 function construct_domtree(blocks::Vector{BasicBlock})
     return update_domtree!(blocks, DomTree(), true, 0)
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index cd4215a97de08..f5d74bd1effc3 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -111,9 +111,8 @@ end
 
 function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState)
     # need `ci` for the slot metadata, IR for the code
-    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
     defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
-    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir`
+    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir`
     return ir
 end
 
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 54081db8d28e4..fb9796aa868ab 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -34,12 +34,22 @@ struct CFG
     blocks::Vector{BasicBlock}
     index::Vector{Int} # map from instruction => basic-block number
                        # TODO: make this O(1) instead of O(log(n_blocks))?
+    domtree
 end
 
+function CFG(blocks::Vector{BasicBlock}, index::Vector{Int})
+    return CFG(blocks, index, construct_domtree(blocks))
+end
+
+copy(c::CFG) = CFG(BasicBlock[copy(b) for b in c.blocks],
+                   copy(c.index),
+                   copy(c.domtree))
+
 function cfg_insert_edge!(cfg::CFG, from::Int, to::Int)
     # Assumes that this edge does not already exist
     push!(cfg.blocks[to].preds, from)
     push!(cfg.blocks[from].succs, to)
+    domtree_insert_edge!(cfg.domtree, cfg.blocks, from, to)
     nothing
 end
 
@@ -49,6 +59,7 @@ function cfg_delete_edge!(cfg::CFG, from::Int, to::Int)
     # Assumes that blocks appear at most once in preds and succs
     deleteat!(preds, findfirst(x->x === from, preds)::Int)
     deleteat!(succs, findfirst(x->x === to, succs)::Int)
+    domtree_delete_edge!(cfg.domtree, cfg.blocks, from, to)
     nothing
 end
 
@@ -553,9 +564,8 @@ mutable struct IncrementalCompact
         if allow_cfg_transforms
             bb_rename = Vector{Int}(undef, length(blocks))
             cur_bb = 1
-            domtree = construct_domtree(blocks)
             for i = 1:length(bb_rename)
-                if bb_unreachable(domtree, i)
+                if bb_unreachable(code.cfg.domtree, i)
                     bb_rename[i] = -1
                 else
                     bb_rename[i] = cur_bb
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 107a6cf389b34..da675f60b0b3e 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -69,8 +69,8 @@ function val_for_def_expr(ir::IRCode, def::Int, fidx::Int)
     end
 end
 
-function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int)
-    curblock = find_curblock(domtree, allblocks, curblock)
+function compute_value_for_block(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, curblock::Int)
+    curblock = find_curblock(ir.cfg.domtree, allblocks, curblock)
     def = 0
     for stmt in du.defs
         if block_for_inst(ir.cfg, stmt) == curblock
@@ -80,10 +80,10 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector
     def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx)
 end
 
-function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int)
+function compute_value_for_use(ir::IRCode, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int)
     # Find the first dominating def
     curblock = stmtblock = block_for_inst(ir.cfg, use_idx)
-    curblock = find_curblock(domtree, allblocks, curblock)
+    curblock = find_curblock(ir.cfg.domtree, allblocks, curblock)
     defblockdefs = Int[stmt for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock]
     def = 0
     if !isempty(defblockdefs)
@@ -105,7 +105,7 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I
         if !haskey(phinodes, curblock)
             # If this happens, we need to search the predecessors for defs. Which
             # one doesn't matter - if it did, we'd have had a phinode
-            return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds))
+            return compute_value_for_block(ir, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds))
         end
         # The use is the phinode
         return phinodes[curblock]
@@ -721,13 +721,6 @@ function getfield_elim_pass!(ir::IRCode)
     used_ssas = copy(compact.used_ssas)
     simple_dce!(compact)
     ir = complete(compact)
-
-    # Compute domtree, needed below, now that we have finished compacting the
-    # IR. This needs to be after we iterate through the IR with
-    # `IncrementalCompact` because removing dead blocks can invalidate the
-    # domtree.
-    @timeit "domtree 2" domtree = construct_domtree(ir.cfg.blocks)
-
     # Now go through any mutable structs and see which ones we can eliminate
     for (idx, (intermediaries, defuse)) in defuses
         intermediaries = collect(intermediaries)
@@ -794,7 +787,7 @@ function getfield_elim_pass!(ir::IRCode)
                 ldu = compute_live_ins(ir.cfg, du)
                 phiblocks = Int[]
                 if !isempty(ldu.live_in_bbs)
-                    phiblocks = idf(ir.cfg, ldu, domtree)
+                    phiblocks = idf(ir.cfg, ldu)
                 end
                 phinodes = IdDict{Int, SSAValue}()
                 for b in phiblocks
@@ -804,19 +797,19 @@ function getfield_elim_pass!(ir::IRCode)
                 # Now go through all uses and rewrite them
                 allblocks = sort(vcat(phiblocks, ldu.def_bbs))
                 for stmt in du.uses
-                    ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt)
+                    ir[SSAValue(stmt)] = compute_value_for_use(ir, allblocks, du, phinodes, fidx, stmt)
                 end
                 if !isbitstype(fieldtype(typ, fidx))
                     for (use, list) in preserve_uses
-                        push!(list, compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, use))
+                        push!(list, compute_value_for_use(ir, allblocks, du, phinodes, fidx, use))
                     end
                 end
                 for b in phiblocks
                     for p in ir.cfg.blocks[b].preds
                         n = ir[phinodes[b]]
                         push!(n.edges, p)
-                        push!(n.values, compute_value_for_block(ir, domtree,
-                            allblocks, du, phinodes, fidx, p))
+                        push!(n.values, compute_value_for_block(
+                            ir, allblocks, du, phinodes, fidx, p))
                     end
                 end
             end
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 5bea1c871610e..b0fad48c57ec7 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -271,10 +271,10 @@ end
 # subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted
 # at `A`). Note however that this does not work the other way. Thus, the algorithm
 # needs to make sure that we always visit `B` before `A`.
-function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
+function idf(cfg::CFG, liveness::BlockLiveness)
     # This should be a priority queue, but TODO - sorted array for now
     defs = liveness.def_bbs
-    pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)]
+    pq = Tuple{Int, Int}[(defs[i], cfg.domtree.nodes[defs[i]].level) for i in 1:length(defs)]
     sort!(pq, by=x->x[2])
     phiblocks = Int[]
     # This bitset makes sure we only add a phi node to a given block once.
@@ -298,7 +298,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
                 # since at this point we know that there is an edge from `node`'s
                 # subtree to `succ`, we know that if succ's level is greater than
                 # that of `node`, it must be dominated by `node`.
-                succ_level = domtree.nodes[succ].level
+                succ_level = cfg.domtree.nodes[succ].level
                 succ_level > level && continue
                 # We don't dominate succ. We need to place a phinode,
                 # unless liveness said otherwise.
@@ -319,7 +319,7 @@ function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
                 end
             end
             # Recurse down the current subtree
-            for child in domtree.nodes[active].children
+            for child in cfg.domtree.nodes[active].children
                 child in visited && continue
                 push!(visited, child)
                 push!(worklist, child)
@@ -374,7 +374,7 @@ end
     RPO traversal and in particular, any use of an SSA value must come after (by linear
     order) its definition.
 """
-function domsort_ssa!(ir::IRCode, domtree::DomTree)
+function domsort_ssa!(ir::IRCode)
     # First compute the new order of basic blocks
     result_order = Int[]
     stack = Int[]
@@ -383,7 +383,7 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
     nnewfallthroughs = 0
     while node !== -1
         push!(result_order, node)
-        cs = domtree.nodes[node].children
+        cs = ir.cfg.domtree.nodes[node].children
         terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst]
         iscondbr = isa(terminator, GotoIfNot)
         let old_node = node + 1
@@ -579,8 +579,8 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode
     return new_typ
 end
 
-function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int, sptypes::Vector{Any},
-                        slottypes::Vector{Any})
+function construct_ssa!(ci::CodeInfo, ir::IRCode, defuse, nargs::Int,
+                        sptypes::Vector{Any}, slottypes::Vector{Any})
     code = ir.stmts.inst
     cfg = ir.cfg
     left = Int[]
@@ -597,7 +597,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
     for (enter_block, exc) in catch_entry_blocks
         exc_handlers[enter_block+1] = (enter_block, exc)
         # TODO: Cut off here if the terminator is a leave corresponding to this enter
-        for block in dominated(domtree, enter_block+1)
+        for block in dominated(cfg.domtree, enter_block+1)
             exc_handlers[block] = (enter_block, exc)
         end
     end
@@ -649,7 +649,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 end
             end
         end
-        phiblocks = idf(cfg, live, domtree)
+        phiblocks = idf(cfg, live)
         for block in phiblocks
             push!(phi_slots[block], idx)
             node = PhiNode()
@@ -893,6 +893,6 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         local node = new_nodes.stmts[i]
         node[:inst] = new_to_regular(renumber_ssa!(node[:inst], ssavalmap), nstmts)
     end
-    @timeit "domsort" ir = domsort_ssa!(ir, domtree)
+    @timeit "domsort" ir = domsort_ssa!(ir)
     return ir
 end
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index b169018517bd7..6f7069f406747 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -11,7 +11,7 @@ if !isdefined(@__MODULE__, Symbol("@verify_error"))
     end
 end
 
-function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool)
+function check_op(ir::IRCode, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool)
     if isa(op, SSAValue)
         if op.id > length(ir.stmts)
             def_bb = block_for_inst(ir.cfg, ir.new_nodes[op.id - length(ir.stmts)].pos)
@@ -28,7 +28,7 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
                 end
             end
         else
-            if !dominates(domtree, def_bb, use_bb) && !(bb_unreachable(domtree, def_bb) && bb_unreachable(domtree, use_bb))
+            if !dominates(ir.cfg.domtree, def_bb, use_bb) && !(bb_unreachable(ir.cfg.domtree, def_bb) && bb_unreachable(ir.cfg.domtree, use_bb))
                 # At the moment, we allow GC preserve tokens outside the standard domination notion
                 #@Base.show ir
                 @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))"
@@ -61,12 +61,23 @@ function count_int(val::Int, arr::Vector{Int})
 end
 
 function verify_ir(ir::IRCode, print::Bool=true)
+    # Verify domtree
+    if ir.cfg.domtree.idoms_bb != construct_domtree(ir.cfg.blocks).idoms_bb
+        # Dynamically updating the domtree made it incorrect?
+        @verify_error "Dominator tree does not match recomputed one"
+        error()
+    end
+    # Verify domtree by comparing with naive algorithm, but only if there
+    # aren't too many blocks for it to handle
+    if length(ir.cfg.blocks) < 1000 && ir.cfg.domtree.idoms_bb != naive_idoms(ir.cfg.blocks)
+        @verify_error "Dominator tree does not match immediate dominators computed by naive algorithm"
+        error()
+    end
     # For now require compact IR
     # @assert isempty(ir.new_nodes)
     # Verify CFG
     last_end = 0
     # Verify statements
-    domtree = construct_domtree(ir.cfg.blocks)
     for (idx, block) in pairs(ir.cfg.blocks)
         if first(block.stmts) != last_end + 1
             #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
@@ -76,7 +87,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
         last_end = last(block.stmts)
         terminator = ir.stmts[last_end][:inst]
 
-        bb_unreachable(domtree, idx) && continue
+        bb_unreachable(ir.cfg.domtree, idx) && continue
         for p in block.preds
             p == 0 && continue
             c = count_int(idx, ir.cfg.blocks[p].succs)
@@ -144,7 +155,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
     for (bb, idx) in bbidxiter(ir)
         # We allow invalid IR in dead code to avoid passes having to detect when
         # they're generating dead code.
-        bb_unreachable(domtree, bb) && continue
+        bb_unreachable(ir.cfg.domtree, bb) && continue
         stmt = ir.stmts[idx][:inst]
         stmt === nothing && continue
         if isa(stmt, PhiNode)
@@ -174,7 +185,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
                     @verify_error "GlobalRefs and Exprs are not allowed as PhiNode values"
                     error()
                 end
-                check_op(ir, domtree, val, edge, last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print)
+                check_op(ir, val, edge, last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print)
             end
         elseif isa(stmt, PhiCNode)
             for i = 1:length(stmt.values)
@@ -211,7 +222,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
             end
             for op in userefs(stmt)
                 op = op[]
-                check_op(ir, domtree, op, bb, idx, print)
+                check_op(ir, op, bb, idx, print)
             end
         end
     end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index c6190b720c5be..afed12c2d6fc1 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -34,8 +34,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src)
     Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    ir = Core.Compiler.domsort_ssa!(ir, domtree)
+    ir = Core.Compiler.domsort_ssa!(ir)
     Core.Compiler.verify_ir(ir)
     phi = ir.stmts.inst[3]
     @test isa(phi, Core.PhiNode) && length(phi.edges) == 1
@@ -62,8 +61,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src)
     Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
-    ir = Core.Compiler.domsort_ssa!(ir, domtree)
+    ir = Core.Compiler.domsort_ssa!(ir)
     Core.Compiler.verify_ir(ir)
 end
 
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 79e8c06ce26e1..7f15f49045e48 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -70,7 +70,7 @@ let cfg = CFG(BasicBlock[
     dfs = Compiler.DFS(cfg.blocks)
     @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
     let correct_idoms = Compiler.naive_idoms(cfg.blocks)
-        @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms
+        @test cfg.domtree.idoms_bb == correct_idoms
         # For completeness, reverse the order of pred/succ in the CFG and verify
         # the answer doesn't change (it does change the which node is chosen
         # as the semi-dominator, since it changes the DFS numbering).
@@ -259,53 +259,42 @@ let cfg = CFG(BasicBlock[
         make_bb([2, 6], []),
         make_bb([4],    [5, 3]),
     ], Int[])
-    domtree = Compiler.construct_domtree(cfg.blocks)
-    @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    @test cfg.domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Test removal of edge between a parent and child in the DFS tree, which
     # should trigger complete recomputation of domtree (first case in algorithm
     # for removing edge from domtree dynamically)
     Compiler.cfg_delete_edge!(cfg, 2, 5)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4]
     # Add edge back (testing first case for insertion)
     Compiler.cfg_insert_edge!(cfg, 2, 5)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Test second case in algorithm for removing edges from domtree, in which
     # `from` is on a semidominator path from the semidominator of `to` to `to`
     Compiler.cfg_delete_edge!(cfg, 6, 5)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4]
     # Add edge back (testing second case for insertion)
     Compiler.cfg_insert_edge!(cfg, 6, 5)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Test last case for removing edges, in which edge does not satisfy either
     # of the above conditions
     Compiler.cfg_delete_edge!(cfg, 6, 3)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
     # Add edge back (testing second case for insertion)
     Compiler.cfg_insert_edge!(cfg, 6, 3)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 
     # Try removing all edges from root
     Compiler.cfg_delete_edge!(cfg, 1, 2)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4]
     Compiler.cfg_delete_edge!(cfg, 1, 3)
-    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0]
     # Add edges back
     Compiler.cfg_insert_edge!(cfg, 1, 2)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0]
     Compiler.cfg_insert_edge!(cfg, 1, 3)
-    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
-    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 end

From 07f5c08258abaadea463f877a76fc8edd79b102f Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Mon, 18 Nov 2019 11:24:27 -0500
Subject: [PATCH 11/16] Update domtrees dynamically when CFGs get modified, and
 fix bug in dynamic domtree implementation

---
 base/compiler/ssair/domtree.jl  |  91 ++++++++++++++++++++++
 base/compiler/ssair/driver.jl   |   1 -
 base/compiler/ssair/inlining.jl |  10 ++-
 base/compiler/ssair/ir.jl       | 132 ++++++++++++++++++--------------
 base/compiler/ssair/slot2ssa.jl |  13 +++-
 5 files changed, 183 insertions(+), 64 deletions(-)

diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index 310e76a463666..610d852e02f95 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -270,9 +270,28 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
     # we wanted to.
     resize!(state, n_nodes)
     for w in 1:max_pre
+        # Only reset semidominators for nodes we want to recompute
         state[w] = SNCAData(typemax(PreNumber), w)
     end
 
+    # If we are only recomputing some of the semidominators, the remaining
+    # labels should be reset, because they may have become inapplicable to the
+    # node/semidominator we are currently processing/recomputing. They can
+    # become inapplicable because of path compressions that were triggered by
+    # nodes that should only be processed after the current one (but were
+    # processed the last time `SNCA!` was run).
+    #
+    # So, for every node that is not being reprocessed, we reset its label to
+    # its semidominator, which is the value that its label assumes once its
+    # semidominator is computed. If this was too conservative, i.e. if the
+    # label would have been updated before we process the current node in a
+    # situation where all semidominators were recomputed, then path compression
+    # will produce the correct label.
+    for w in max_pre+1:n_nodes
+        semi = state[w].semi
+        state[w] = SNCAData(semi, semi)
+    end
+
     # Calculate semidominators, but only for blocks with preorder number up to
     # max_pre
     ancestors = copy(D.to_parent_pre)
@@ -383,6 +402,11 @@ end
 "Given updated blocks, update the given dominator tree with an inserted edge."
 function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock},
                               from::BBNumber, to::BBNumber)
+    # `from` is unreachable, so `from` and `to` aren't in domtree
+    if bb_unreachable(domtree, from)
+        return domtree
+    end
+
     # Implements Section 3.1 of [GI16]
     dt        = domtree.dfs_tree
     from_pre  = dt.to_pre[from]
@@ -404,6 +428,11 @@ end
 "Given updated blocks, update the given dominator tree with a deleted edge."
 function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock},
                               from::BBNumber, to::BBNumber)
+    # `from` is unreachable, so `from` and `to` aren't in domtree
+    if bb_unreachable(domtree, from)
+        return domtree
+    end
+
     # Implements Section 3.1 of [GI16]
     if is_parent(domtree.dfs_tree, from, to)
         # The `from` block is the parent of the `to` block in the DFS tree, so
@@ -456,6 +485,68 @@ function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber)
     return false
 end
 
+"""
+Rename basic block numbers in a dominator tree, removing the block if it is
+renamed to -1.
+"""
+function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber})
+    # Rename DFS tree
+    rename_nodes!(domtree.dfs_tree, rename_bb)
+
+    # `snca_state` is indexed by preorder number, so should be unchanged
+
+    # Rename `idoms_bb` and `nodes`
+    old_idoms_bb = copy(domtree.idoms_bb)
+    old_nodes = copy(domtree.nodes)
+    for (old_bb, new_bb) in enumerate(rename_bb)
+        if new_bb != -1
+            domtree.idoms_bb[new_bb] = (new_bb == 1) ?
+                0 : rename_bb[old_idoms_bb[old_bb]]
+            domtree.nodes[new_bb] = old_nodes[old_bb]
+            map!(i -> rename_bb[i],
+                 domtree.nodes[new_bb].children,
+                 domtree.nodes[new_bb].children)
+        end
+    end
+
+    # length of `to_pre` after renaming DFS tree is new number of basic blocks
+    resize!(domtree.idoms_bb, length(domtree.dfs_tree.to_pre))
+    resize!(domtree.nodes, length(domtree.dfs_tree.to_pre))
+    return domtree
+end
+
+"""
+Rename basic block numbers in a DFS tree, removing the block if it is renamed
+to -1.
+"""
+function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber})
+    n_blocks = length(D.to_pre)
+    n_reachable_blocks = length(D.from_pre)
+
+    old_to_pre = copy(D.to_pre)
+    old_from_pre = copy(D.from_pre)
+    old_to_post = copy(D.to_post)
+    old_from_post = copy(D.from_post)
+    max_new_bb = 0
+    for (old_bb, new_bb) in enumerate(rename_bb)
+        if new_bb != -1
+            D.to_pre[new_bb] = old_to_pre[old_bb]
+            D.from_pre[old_to_pre[old_bb]] = new_bb
+            D.to_post[new_bb] = old_to_post[old_bb]
+            D.from_post[old_to_post[old_bb]] = new_bb
+
+            # Keep track of highest BB number to resize arrays with
+            if new_bb > max_new_bb
+                max_new_bb = new_bb
+            end
+        end
+    end
+    resize!(D.to_pre, max_new_bb)
+    resize!(D.to_post, max_new_bb)
+    # `to_parent_pre` should be unchanged
+    return D
+end
+
 """
 Checks if bb1 dominates bb2.
 bb1 and bb2 are indexes into the CFG blocks.
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index f5d74bd1effc3..5a1bfcb28a530 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -121,7 +121,6 @@ function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState)
     ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, nargs, sv)
     ir = slot2reg(ir, ci, nargs, sv)
     #@Base.show ("after_construct", ir)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
     @timeit "compact 1" ir = compact!(ir)
     @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv)
     #@timeit "verify 2" verify_ir(ir)
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 926491223795c..4b9b7f5fd41e4 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -298,6 +298,7 @@ function finish_cfg_inline!(state::CFGInliningState)
     end
 end
 
+# NOTE: The domtree is not kept up-to-date with changes this makes
 function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any},
                          linetable::Vector{LineInfoNode}, item::InliningTodo,
                          boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
@@ -415,6 +416,7 @@ end
 
 const fatal_type_bound_error = ErrorException("fatal error in type inference (type bound)")
 
+# NOTE: The domtree is not kept up-to-date with changes this makes
 function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
                                argexprs::Vector{Any}, linetable::Vector{LineInfoNode},
                                item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
@@ -497,7 +499,9 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
 end
 
 function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool)
-    # Compute the new CFG first (modulo statement ranges, which will be computed below)
+    # Compute the new CFG first (modulo statement ranges, which will be
+    # computed below, and the domtree, which will be updated below before we
+    # iterate through the statements)
     state = CFGInliningState(ir)
     for item in todo
         if isa(item, UnionSplit)
@@ -518,6 +522,9 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo
 
     let compact = IncrementalCompact(ir, false)
         compact.result_bbs = state.new_cfg_blocks
+        # Recompute the domtree now that the CFG has been modified
+        compact.result_domtree = construct_domtree(compact.result_bbs)
+
         # This needs to be a minimum and is more of a size hint
         nn = 0
         for item in todo
@@ -572,7 +579,6 @@ function batch_inline!(todo::Vector{Any}, ir::IRCode, linetable::Vector{LineInfo
                 compact[idx] = PhiNode(Any[edge == length(state.bb_rename) ? length(state.new_cfg_blocks) : state.bb_rename[edge+1]-1 for edge in stmt.edges], stmt.values)
             end
         end
-
         ir = finish(compact)
     end
     return ir
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index fb9796aa868ab..f13958f22afdb 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -34,7 +34,7 @@ struct CFG
     blocks::Vector{BasicBlock}
     index::Vector{Int} # map from instruction => basic-block number
                        # TODO: make this O(1) instead of O(log(n_blocks))?
-    domtree
+    domtree # TODO type
 end
 
 function CFG(blocks::Vector{BasicBlock}, index::Vector{Int})
@@ -54,11 +54,7 @@ function cfg_insert_edge!(cfg::CFG, from::Int, to::Int)
 end
 
 function cfg_delete_edge!(cfg::CFG, from::Int, to::Int)
-    preds = cfg.blocks[to].preds
-    succs = cfg.blocks[from].succs
-    # Assumes that blocks appear at most once in preds and succs
-    deleteat!(preds, findfirst(x->x === from, preds)::Int)
-    deleteat!(succs, findfirst(x->x === to, succs)::Int)
+    kill_edge!(cfg.blocks, from, to)
     domtree_delete_edge!(cfg.domtree, cfg.blocks, from, to)
     nothing
 end
@@ -529,6 +525,7 @@ mutable struct IncrementalCompact
     ir::IRCode
     result::InstructionStream
     result_bbs::Vector{BasicBlock}
+    result_domtree # TODO type
 
     ssa_rename::Vector{Any}
     bb_rename_pred::Vector{Int}
@@ -591,18 +588,22 @@ mutable struct IncrementalCompact
             let blocks = blocks
                 result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
             end
+            result_domtree = copy(code.cfg.domtree)
+            rename_nodes!(result_domtree, bb_rename)
         else
             bb_rename = Vector{Int}()
             result_bbs = code.cfg.blocks
+            result_domtree = code.cfg.domtree
         end
         ssa_rename = Any[SSAValue(i) for i = 1:new_len]
         late_fixup = Vector{Int}()
         new_new_nodes = NewNodeStream()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1,
-            new_new_nodes, pending_nodes, pending_perm,
-            1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms)
+        return new(code, result, result_bbs, result_domtree, ssa_rename,
+                   bb_rename, bb_rename, used_ssas, late_fixup, perm, 1,
+                   new_new_nodes, pending_nodes, pending_perm, 1, 1, 1, false,
+                   allow_cfg_transforms, allow_cfg_transforms)
     end
 
     # For inlining
@@ -616,11 +617,11 @@ mutable struct IncrementalCompact
         new_new_nodes = NewNodeStream()
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
-        return new(code, parent.result,
-            parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas,
-            late_fixup, perm, 1,
-            new_new_nodes, pending_nodes, pending_perm,
-            1, result_offset, parent.active_result_bb, false, false, false)
+        return new(code, parent.result, parent.result_bbs,
+            parent.result_domtree, ssa_rename, bb_rename, bb_rename,
+            parent.used_ssas, late_fixup, perm, 1, new_new_nodes,
+            pending_nodes, pending_perm, 1, result_offset,
+            parent.active_result_bb, false, false, false)
     end
 end
 
@@ -892,63 +893,74 @@ function kill_edge!(bbs::Vector{BasicBlock}, from::Int, to::Int)
     preds, succs = bbs[to].preds, bbs[from].succs
     deleteat!(preds, findfirst(x->x === from, preds)::Int)
     deleteat!(succs, findfirst(x->x === to, succs)::Int)
-    if length(preds) == 0
-        for succ in copy(bbs[to].succs)
-            kill_edge!(bbs, to, succ)
-        end
-    end
+    nothing
 end
 
 # N.B.: from and to are non-renamed indices
 function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::Int)
-    # Note: We recursively kill as many edges as are obviously dead. However, this
-    # may leave dead loops in the IR. We kill these later in a CFG cleanup pass (or
-    # worstcase during codegen).
-    preds = compact.result_bbs[compact.bb_rename_succ[to]].preds
-    succs = compact.result_bbs[compact.bb_rename_pred[from]].succs
-    deleteat!(preds, findfirst(x->x === compact.bb_rename_pred[from], preds)::Int)
-    deleteat!(succs, findfirst(x->x === compact.bb_rename_succ[to], succs)::Int)
-    # Check if the block is now dead
-    if length(preds) == 0
-        for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs)
-            kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred))
+    renamed_from = compact.bb_rename_pred[from]
+    renamed_to = compact.bb_rename_succ[to]
+
+    preds = compact.result_bbs[renamed_to].preds
+    succs = compact.result_bbs[renamed_from].succs
+
+    deleteat!(preds, findfirst(x->x === renamed_from, preds)::Int)
+    deleteat!(succs, findfirst(x->x === renamed_to, succs)::Int)
+
+    domtree_delete_edge!(compact.result_domtree,
+                         compact.result_bbs,
+                         renamed_from,
+                         renamed_to)
+
+    # Recursively kill edges to dead blocks. This is not necessary for removing
+    # dead blocks with a subsequent `IncrementalCompact`, but killing all the
+    # statements in dead blocks allows passes not to have to worry about
+    # whether statements have become invalid by becoming dead.
+    if bb_unreachable(compact.result_domtree, renamed_to)
+        for succ in copy(compact.result_bbs[renamed_to].succs)
+            # Make sure edge hasn't already been killed in a previous iteration
+            # of this loop
+            if succ in compact.result_bbs[renamed_to].succs
+                kill_edge!(compact, active_bb, to,
+                           findfirst(x->x === succ, compact.bb_rename_pred))
+            end
         end
         if to < active_bb
             # Kill all statements in the block
-            stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts
+            stmts = compact.result_bbs[renamed_to].stmts
             for stmt in stmts
                 compact.result[stmt][:inst] = nothing
             end
             compact.result[last(stmts)][:inst] = ReturnNode()
         end
-    else
-        # Remove this edge from all phi nodes in `to` block
-        # NOTE: It is possible for `to` to contain only `nothing` statements,
-        #       so we must be careful to stop at its last statement
-        if to < active_bb
-            stmts = compact.result_bbs[compact.bb_rename_succ[to]].stmts
-            idx = first(stmts)
-            while idx <= last(stmts)
-                stmt = compact.result[idx][:inst]
-                stmt === nothing && continue
-                isa(stmt, PhiNode) || break
-                i = findfirst(x-> x === compact.bb_rename_pred[from], stmt.edges)
-                if i !== nothing
-                    deleteat!(stmt.edges, i)
-                    deleteat!(stmt.values, i)
-                end
-                idx += 1
+
+        # TODO: kill statements in blocks past `active_bb`
+    end
+
+    # Remove this edge from any phi nodes
+    if to < active_bb
+        stmts = compact.result_bbs[renamed_to].stmts
+        idx = first(stmts)
+        while idx <= last(stmts)
+            stmt = compact.result[idx][:inst]
+            stmt === nothing && continue
+            isa(stmt, PhiNode) || break
+            i = findfirst(x-> x === renamed_from, stmt.edges)
+            if i !== nothing
+                deleteat!(stmt.edges, i)
+                deleteat!(stmt.values, i)
             end
-        else
-            stmts = compact.ir.cfg.blocks[to].stmts
-            for stmt in CompactPeekIterator(compact, first(stmts), last(stmts))
-                stmt === nothing && continue
-                isa(stmt, PhiNode) || break
-                i = findfirst(x-> x === from, stmt.edges)
-                if i !== nothing
-                    deleteat!(stmt.edges, i)
-                    deleteat!(stmt.values, i)
-                end
+            idx += 1
+        end
+    else
+        stmts = compact.ir.cfg.blocks[to].stmts
+        for stmt in CompactPeekIterator(compact, first(stmts), last(stmts))
+            stmt === nothing && continue
+            isa(stmt, PhiNode) || break
+            i = findfirst(x-> x === from, stmt.edges)
+            if i !== nothing
+                deleteat!(stmt.edges, i)
+                deleteat!(stmt.values, i)
             end
         end
     end
@@ -1401,7 +1413,9 @@ end
 
 function complete(compact::IncrementalCompact)
     result_bbs = resize!(compact.result_bbs, compact.active_result_bb-1)
-    cfg = CFG(result_bbs, Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)])
+    cfg = CFG(result_bbs,
+              Int[first(result_bbs[i].stmts) for i in 2:length(result_bbs)],
+              compact.result_domtree)
     return IRCode(compact.ir, compact.result, cfg, compact.new_new_nodes)
 end
 
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index b0fad48c57ec7..9276238e640e5 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -385,7 +385,6 @@ function domsort_ssa!(ir::IRCode)
         push!(result_order, node)
         cs = ir.cfg.domtree.nodes[node].children
         terminator = ir.stmts[last(ir.cfg.blocks[node].stmts)][:inst]
-        iscondbr = isa(terminator, GotoIfNot)
         let old_node = node + 1
             if length(cs) >= 1
                 # Adding the nodes in reverse sorted order attempts to retain
@@ -499,7 +498,17 @@ function domsort_ssa!(ir::IRCode)
     for i in 1:length(result)
         result[i][:inst] = renumber_ssa!(result[i][:inst], inst_rename, true)
     end
-    cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]])
+    # Recompute domtree from scratch because adding the new nodes for retaining
+    # fallthrough can be a significant change to the CFG, and recomputing the
+    # whole thing is probably faster than making many updates. If not for these
+    # new nodes, we could use
+    #
+    # domtree = rename_blocks!(
+    #     ir.cfg.domtree,
+    #     Int[haskey(bb_rename, old_bb) ? bb_rename[old_bb] : -1
+    #         for old_bb in 1:length(ir.cfg.blocks)])
+    domtree = construct_domtree(new_bbs)
+    cfg = CFG(new_bbs, Int[first(bb.stmts) for bb in new_bbs[2:end]], domtree)
     new_new_nodes = NewNodeStream(length(ir.new_nodes))
     for i = 1:length(ir.new_nodes)
         new_info = ir.new_nodes.info[i]

From e2e00b0f3258e1e0ad4ffaa98d8caf99a37a6d31 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Mon, 25 Nov 2019 13:25:08 -0500
Subject: [PATCH 12/16] Update domtree dynamically in `cfg_simplify`

---
 base/compiler/ssair/domtree.jl | 91 ++++++++++++++++++++++++++++++++--
 base/compiler/ssair/ir.jl      |  6 +--
 base/compiler/ssair/passes.jl  | 19 ++++---
 test/compiler/ssair.jl         | 14 +++---
 4 files changed, 112 insertions(+), 18 deletions(-)

diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index 610d852e02f95..86e72fba7982a 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -489,9 +489,9 @@ end
 Rename basic block numbers in a dominator tree, removing the block if it is
 renamed to -1.
 """
-function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber})
+function rename_blocks!(domtree::DomTree, rename_bb::Vector{BBNumber})
     # Rename DFS tree
-    rename_nodes!(domtree.dfs_tree, rename_bb)
+    rename_blocks!(domtree.dfs_tree, rename_bb)
 
     # `snca_state` is indexed by preorder number, so should be unchanged
 
@@ -519,7 +519,7 @@ end
 Rename basic block numbers in a DFS tree, removing the block if it is renamed
 to -1.
 """
-function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber})
+function rename_blocks!(D::DFSTree, rename_bb::Vector{BBNumber})
     n_blocks = length(D.to_pre)
     n_reachable_blocks = length(D.from_pre)
 
@@ -547,6 +547,91 @@ function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber})
     return D
 end
 
+"""
+Combine two blocks `from` and `to`. It is assumed that there is an edge from
+`from` to `to`, and that the sole successor of `from` is `to` and the sole
+predecessor of `to` is `from`. This does not remove the `to` node, which is
+then considered to have no edges.
+"""
+function combine_blocks!(domtree::DomTree, from::BBNumber, to::BBNumber)
+    orig_pre_of_to = domtree.dfs_tree.to_pre[to]
+
+    # Combine nodes in DFS tree
+    combine_blocks!(domtree.dfs_tree, from, to)
+
+    # Update `snca_state`
+    deleteat!(domtree.snca_state, orig_pre_of_to)
+    for pre in 1:length(domtree.snca_state)
+        # If semidominator was `to`, set it to `from` instead
+        if domtree.snca_state[pre].semi == orig_pre_of_to
+            label = domtree.snca_state[pre].label
+            domtree.snca_state[pre] = SNCAData(orig_pre_of_to - 1, label)
+        end
+        # Labels will be reset anyway, so no need to update them
+    end
+
+    # Update `idoms_bb`
+    for bb in 1:length(domtree.idoms_bb)
+        # If immediate dominator was `to`, set it to `from` instead
+        if domtree.idoms_bb[bb] == to
+            domtree.idoms_bb[bb] = from
+        end
+    end
+
+    # Update `nodes`
+    copy!(domtree.nodes[from].children, domtree.nodes[to].children)
+    domtree.nodes[to] = DomTreeNode()
+    # Recursively decrement level of the affected children
+    worklist = domtree.nodes[from].children
+    while !isempty(worklist)
+        node = pop!(worklist)
+        domtree.nodes[node] = DomTreeNode(domtree.nodes[node].level-1,
+                                          domtree.nodes[node].children)
+        foreach(child -> push!(worklist, child),
+                domtree.nodes[node].children)
+    end
+
+    return domtree
+end
+
+function combine_blocks!(D::DFSTree, from::BBNumber, to::BBNumber)
+    # Reassignment of preorder and postorder numbers depend on these
+    # conditions, which should hold if `from` has only one successor `to` and
+    # `to` has only one predecessor `from`.
+    @assert D.to_pre[from] + 1 == D.to_pre[to]
+    @assert D.to_post[from] == D.to_post[to] + 1
+
+    # Set preorder number of `to` to 0,
+    # decrement preorder numbers greater than original preorder number of `to`.
+    # Set postorder number of `to` to 0,
+    # decrement postorder numbers greater than original preorder number of `to`.
+
+    orig_pre_of_to = D.to_pre[to]
+    orig_post_of_to = D.to_post[to]
+
+    D.to_pre[to] = 0
+    D.to_post[to] = 0
+    for bb in 1:length(D.to_pre)
+        if D.to_pre[bb] > orig_pre_of_to
+            D.to_pre[bb] -= 1
+        end
+        if D.to_post[bb] > orig_post_of_to
+            D.to_post[bb] -= 1
+        end
+    end
+    deleteat!(D.from_pre, orig_pre_of_to)
+    deleteat!(D.from_post, orig_post_of_to)
+
+    deleteat!(D.to_parent_pre, orig_pre_of_to)
+    for pre in 1:length(D.to_parent_pre)
+        if D.to_parent_pre[pre] > orig_pre_of_to
+            D.to_parent_pre[pre] -= 1
+        end
+    end
+
+    return D
+end
+
 """
 Checks if bb1 dominates bb2.
 bb1 and bb2 are indexes into the CFG blocks.
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index f13958f22afdb..a01fddddd02f9 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -586,10 +586,10 @@ mutable struct IncrementalCompact
                 end
             end
             let blocks = blocks
-                result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks) if bb_rename[i] != -1]
+                result_bbs = BasicBlock[blocks[i] for i = 1:length(blocks)
+                                        if bb_rename[i] != -1]
             end
-            result_domtree = copy(code.cfg.domtree)
-            rename_nodes!(result_domtree, bb_rename)
+            result_domtree = rename_blocks!(copy(code.cfg.domtree), bb_rename)
         else
             bb_rename = Vector{Int}()
             result_bbs = code.cfg.blocks
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index da675f60b0b3e..a780e5f8f9271 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1032,6 +1032,9 @@ end
 
 function cfg_simplify!(ir::IRCode)
     bbs = ir.cfg.blocks
+    old_domtree = ir.cfg.domtree
+    new_domtree = copy(old_domtree)
+
     merge_into = zeros(Int, length(bbs))
     merged_succ = zeros(Int, length(bbs))
     function follow_merge_into(idx::Int)
@@ -1055,6 +1058,13 @@ function cfg_simplify!(ir::IRCode)
                 # Prevent cycles by making sure we don't end up back at `idx`
                 # by following what is to be merged into `succ`
                 if follow_merged_succ(succ) != idx
+                    # Update domtree, by combining the block that `idx` is
+                    # going to be merged into with the block that `succ` was
+                    # going to be merged into (before this merge)
+                    combine_blocks!(new_domtree,
+                                    follow_merge_into(idx),
+                                    follow_merge_into(succ))
+
                     merge_into[succ] = idx
                     merged_succ[idx] = succ
                 end
@@ -1066,12 +1076,8 @@ function cfg_simplify!(ir::IRCode)
     max_bb_num = 1
     bb_rename_succ = zeros(Int, length(bbs))
     for i = 1:length(bbs)
-        # Drop blocks that will be merged away
-        if merge_into[i] != 0
-            bb_rename_succ[i] = -1
-        end
-        # Drop blocks with no predecessors
-        if i != 1 && length(ir.cfg.blocks[i].preds) == 0
+        # Drop blocks that will be merged away or are unreachable
+        if merge_into[i] != 0 || bb_unreachable(old_domtree, i)
             bb_rename_succ[i] = -1
         end
 
@@ -1152,6 +1158,7 @@ function cfg_simplify!(ir::IRCode)
     compact.bb_rename_succ = bb_rename_succ
     compact.bb_rename_pred = bb_rename_pred
     compact.result_bbs = cresult_bbs
+    compact.result_domtree = rename_blocks!(new_domtree, bb_rename_succ)
     result_idx = 1
     for (idx, orig_bb) in enumerate(result_bbs)
         ms = orig_bb
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index 7f15f49045e48..cf381cdefae47 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -222,12 +222,14 @@ end
 # nodes
 let ci = make_ci([
         # Block 1
-        Core.Compiler.GotoNode(3),
-        # Block 2 (no predecessors)
-        Core.Compiler.ReturnNode(3),
-        # Block 3
-        Core.PhiNode(Any[1, 2], Any[100, 200]),
-        Core.Compiler.ReturnNode(Core.SSAValue(3))
+        Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 4),
+        # Block 2
+        Core.Compiler.GotoNode(4),
+        # Block 3 (no predecessors)
+        Core.Compiler.GotoNode(4),
+        # Block 4
+        Core.PhiNode(Any[1, 2, 3], Any[100, 200, 300]),
+        Core.Compiler.ReturnNode(Core.SSAValue(4))
     ])
     ir = Core.Compiler.inflate_ir(ci)
     ir = Core.Compiler.compact!(ir, true)

From 17a69997bccf7d91bd2cafe4d7edb17caf72b041 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Sun, 8 Dec 2019 01:23:42 -0500
Subject: [PATCH 13/16] Kill statements in blocks that are unreachable, add
 tests for killing edges

This change only affects statements that we have yet to encounter after killing
an edge, while iterating through `IncrementalCompact`. Statements in dead
blocks that come before the point at which the edge is killed are killed in
`kill_edge!`, when the edge is killed.
---
 base/compiler/ssair/ir.jl | 30 ++++++++++++++++-----
 test/compiler/ssair.jl    | 56 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index a01fddddd02f9..65748a591cbb8 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -3,6 +3,8 @@
 @inline isexpr(@nospecialize(stmt), head::Symbol) = isa(stmt, Expr) && stmt.head === head
 Core.PhiNode() = Core.PhiNode(Any[], Any[])
 
+isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
+
 """
 Like UnitRange{Int}, but can handle the `last` field, being temporarily
 < first (this can happen during compacting)
@@ -925,16 +927,21 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
                            findfirst(x->x === succ, compact.bb_rename_pred))
             end
         end
-        if to < active_bb
-            # Kill all statements in the block
+
+        # Kill all statements in dead successor if it has already been
+        # processed. Also kill statements if dead successor is the active BB,
+        # in order to kill statements that have already been processed,
+        # although statements in the current BB that have not been processed
+        # will be killed in `process_node!`. Statements in dead blocks that
+        # have not yet been processed (`to` > `active_bb`) will be killed in
+        # `process_node!`.
+        if to <= active_bb
             stmts = compact.result_bbs[renamed_to].stmts
             for stmt in stmts
                 compact.result[stmt][:inst] = nothing
             end
             compact.result[last(stmts)][:inst] = ReturnNode()
         end
-
-        # TODO: kill statements in blocks past `active_bb`
     end
 
     # Remove this edge from any phi nodes
@@ -974,7 +981,19 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
     late_fixup = compact.late_fixup
     used_ssas = compact.used_ssas
     ssa_rename[idx] = SSAValue(result_idx)
-    if stmt === nothing
+    if compact.cfg_transforms_enabled &&
+            # Don't kill statements in blocks renamed to -1 because
+            # `cfg_simplify!` renames blocks that have been merged into others
+            # to -1, but the statements in them are not dead. Unreachable
+            # blocks detected in the constructor of `IncrementalCompact` are
+            # removed then, so we never encounter statements from them here.
+            compact.bb_rename_succ[active_bb] != -1 &&
+            bb_unreachable(compact.result_domtree,
+                           compact.bb_rename_succ[active_bb])
+        # Kill statements in dead blocks
+        result[result_idx][:inst] = isterminator(stmt) ? ReturnNode() : nothing
+        result_idx += 1
+    elseif stmt === nothing
         ssa_rename[idx] = stmt
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
@@ -1272,7 +1291,6 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
     # result_idx is not, incremented, but that's ok and expected
     compact.result[old_result_idx] = compact.ir.stmts[idx]
     result_idx = process_node!(compact, old_result_idx, compact.ir.stmts[idx], idx, idx, active_bb, true)
-    stmt_if_any = old_result_idx == result_idx ? nothing : compact.result[old_result_idx][:inst]
     compact.result_idx = result_idx
     if idx == last(bb.stmts) && !attach_after_stmt_after(compact, idx)
         finish_current_bb!(compact, active_bb, old_result_idx)
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index cf381cdefae47..e04f4c89ca727 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -300,3 +300,59 @@ let cfg = CFG(BasicBlock[
     Compiler.cfg_insert_edge!(cfg, 1, 3)
     @test cfg.domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
 end
+
+# Make sure killing edges while iterating through an `IncrementalCompact` kills
+# all the statements in the dead blocks too.
+#
+# Block that becomes unreachable is before the active BB
+let ci = make_ci([
+        # Block 1
+        Core.Compiler.GotoNode(4),
+        # Block 2
+        Core.Compiler.PhiNode(Any[2, 4], Any[100, 200]),
+        Core.Compiler.GotoNode(2),
+        # Block 3
+        Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 2),
+        # Block 4
+        Core.Compiler.ReturnNode(0)
+    ])
+    ir = Core.Compiler.inflate_ir(ci)
+    compact = Core.Compiler.IncrementalCompact(ir, true)
+    Core.Compiler.foreach(x -> begin
+                              # Delete edge to block 2 when at statement 4
+                              if x.first == 4
+                                  Core.Compiler.kill_edge!(
+                                      compact, compact.active_result_bb, 3, 2)
+                                  compact.result[4] = Core.Compiler.GotoNode(4)
+                              end
+                          end, compact)
+    ir = Core.Compiler.finish(compact)
+    @test Core.Compiler.verify_ir(ir) === nothing
+    @test ir.stmts[2][:inst] === nothing
+end
+# Block that becomes unreachable is after the active BB
+let ci = make_ci([
+        # Block 1
+        Core.Compiler.GotoIfNot(Expr(:call, GlobalRef(Main, :something)), 3),
+        # Block 2
+        Core.Compiler.GotoNode(5),
+        # Block 3
+        Core.Compiler.PhiNode(Any[2, 3], Any[100, 200]),
+        Core.Compiler.GotoNode(3),
+        # Block 4
+        Core.Compiler.ReturnNode(0)
+    ])
+    ir = Core.Compiler.inflate_ir(ci)
+    compact = Core.Compiler.IncrementalCompact(ir, true)
+    Core.Compiler.foreach(x -> begin
+                              # Delete edge to block 3 when at statement 1
+                              if x.first == 1
+                                  Core.Compiler.kill_edge!(
+                                      compact, compact.active_result_bb, 1, 3)
+                                  compact.result[1] = Core.Compiler.GotoNode(2)
+                              end
+                          end, compact)
+    ir = Core.Compiler.finish(compact)
+    @test Core.Compiler.verify_ir(ir) === nothing
+    @test ir.stmts[3][:inst] === nothing
+end

From 0b3c63c5903f21e438e219d34285c3b7b793b836 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Mon, 25 Nov 2019 15:12:23 -0500
Subject: [PATCH 14/16] Fix infinite loop bug in `kill_edge!`

If a statement was `nothing`, `kill_edge!` would never move on from trying to
kill it because the index wasn't incremented.
---
 base/compiler/ssair/ir.jl | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index 65748a591cbb8..e2a6d7611bcc1 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -530,6 +530,9 @@ mutable struct IncrementalCompact
     result_domtree # TODO type
 
     ssa_rename::Vector{Any}
+    # `bb_rename_pred` is for renaming predecessors to the blocks they have
+    # been merged into (differs from `bb_rename_succ` only if blocks are
+    # combined), `bb_rename_succ` is how blocks are actually renamed
     bb_rename_pred::Vector{Int}
     bb_rename_succ::Vector{Int}
 
@@ -906,8 +909,8 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
     preds = compact.result_bbs[renamed_to].preds
     succs = compact.result_bbs[renamed_from].succs
 
-    deleteat!(preds, findfirst(x->x === renamed_from, preds)::Int)
-    deleteat!(succs, findfirst(x->x === renamed_to, succs)::Int)
+    deleteat!(preds, findfirst(x -> x === renamed_from, preds)::Int)
+    deleteat!(succs, findfirst(x -> x === renamed_to, succs)::Int)
 
     domtree_delete_edge!(compact.result_domtree,
                          compact.result_bbs,
@@ -924,7 +927,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
             # of this loop
             if succ in compact.result_bbs[renamed_to].succs
                 kill_edge!(compact, active_bb, to,
-                           findfirst(x->x === succ, compact.bb_rename_pred))
+                           findfirst(x -> x === succ, compact.bb_rename_pred))
             end
         end
 
@@ -950,9 +953,12 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
         idx = first(stmts)
         while idx <= last(stmts)
             stmt = compact.result[idx][:inst]
-            stmt === nothing && continue
+            if stmt === nothing
+                idx += 1
+                continue
+            end
             isa(stmt, PhiNode) || break
-            i = findfirst(x-> x === renamed_from, stmt.edges)
+            i = findfirst(x -> x === renamed_from, stmt.edges)
             if i !== nothing
                 deleteat!(stmt.edges, i)
                 deleteat!(stmt.values, i)
@@ -964,7 +970,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
         for stmt in CompactPeekIterator(compact, first(stmts), last(stmts))
             stmt === nothing && continue
             isa(stmt, PhiNode) || break
-            i = findfirst(x-> x === from, stmt.edges)
+            i = findfirst(x -> x === from, stmt.edges)
             if i !== nothing
                 deleteat!(stmt.edges, i)
                 deleteat!(stmt.values, i)

From ff20764ba164aceec6e248ea4d50188ab9895445 Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Sat, 14 Dec 2019 22:36:57 -0500
Subject: [PATCH 15/16] Move `BasicBlock` to separate source file

This is so we can add type declarations to fields in ir.jl that are domtrees,
by breaking the dependency loop between domtree.jl (uses basic blocks but
defines domtrees) and ir.jl (uses domtrees but defined basic blocks).
---
 base/compiler/ssair/basicblock.jl | 32 +++++++++++++++++++++++++++++++
 base/compiler/ssair/driver.jl     |  3 ++-
 base/compiler/ssair/ir.jl         | 31 ++----------------------------
 3 files changed, 36 insertions(+), 30 deletions(-)
 create mode 100644 base/compiler/ssair/basicblock.jl

diff --git a/base/compiler/ssair/basicblock.jl b/base/compiler/ssair/basicblock.jl
new file mode 100644
index 0000000000000..427aae707e664
--- /dev/null
+++ b/base/compiler/ssair/basicblock.jl
@@ -0,0 +1,32 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+Like UnitRange{Int}, but can handle the `last` field, being temporarily
+< first (this can happen during compacting)
+"""
+struct StmtRange <: AbstractUnitRange{Int}
+    start::Int
+    stop::Int
+end
+
+first(r::StmtRange) = r.start
+last(r::StmtRange) = r.stop
+iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1)
+
+StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range))
+
+struct BasicBlock
+    stmts::StmtRange
+    preds::Vector{Int}
+    succs::Vector{Int}
+end
+
+function BasicBlock(stmts::StmtRange)
+    return BasicBlock(stmts, Int[], Int[])
+end
+
+function BasicBlock(old_bb, stmts)
+    return BasicBlock(stmts, old_bb.preds, old_bb.succs)
+end
+
+copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs))
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index 5a1bfcb28a530..1f6470fed9046 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -10,8 +10,9 @@ else
     end
 end
 
-include("compiler/ssair/ir.jl")
+include("compiler/ssair/basicblock.jl")
 include("compiler/ssair/domtree.jl")
+include("compiler/ssair/ir.jl")
 include("compiler/ssair/slot2ssa.jl")
 include("compiler/ssair/queries.jl")
 include("compiler/ssair/passes.jl")
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index e2a6d7611bcc1..d34a854f8ed0c 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -5,38 +5,11 @@ Core.PhiNode() = Core.PhiNode(Any[], Any[])
 
 isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
 
-"""
-Like UnitRange{Int}, but can handle the `last` field, being temporarily
-< first (this can happen during compacting)
-"""
-struct StmtRange <: AbstractUnitRange{Int}
-    start::Int
-    stop::Int
-end
-first(r::StmtRange) = r.start
-last(r::StmtRange) = r.stop
-iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1)
-
-StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range))
-
-struct BasicBlock
-    stmts::StmtRange
-    preds::Vector{Int}
-    succs::Vector{Int}
-end
-function BasicBlock(stmts::StmtRange)
-    return BasicBlock(stmts, Int[], Int[])
-end
-function BasicBlock(old_bb, stmts)
-    return BasicBlock(stmts, old_bb.preds, old_bb.succs)
-end
-copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs))
-
 struct CFG
     blocks::Vector{BasicBlock}
     index::Vector{Int} # map from instruction => basic-block number
                        # TODO: make this O(1) instead of O(log(n_blocks))?
-    domtree # TODO type
+    domtree::DomTree
 end
 
 function CFG(blocks::Vector{BasicBlock}, index::Vector{Int})
@@ -527,7 +500,7 @@ mutable struct IncrementalCompact
     ir::IRCode
     result::InstructionStream
     result_bbs::Vector{BasicBlock}
-    result_domtree # TODO type
+    result_domtree::DomTree
 
     ssa_rename::Vector{Any}
     # `bb_rename_pred` is for renaming predecessors to the blocks they have

From 6a250c4249de25784bda9223192f4ec8bc897a7b Mon Sep 17 00:00:00 2001
From: Leon Shen <leon@yhls.org>
Date: Thu, 31 Oct 2019 12:21:14 -0400
Subject: [PATCH 16/16] Enable CFG transforms; turn DCE back on globally

---
 base/compiler/ssair/ir.jl | 4 ++--
 test/compiler/inline.jl   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index d34a854f8ed0c..4688a14a6fb8c 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -527,7 +527,7 @@ mutable struct IncrementalCompact
     cfg_transforms_enabled::Bool
     fold_constant_branches::Bool
 
-    function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=false)
+    function IncrementalCompact(code::IRCode, allow_cfg_transforms::Bool=true)
         # Sort by position with attach after nodes after regular ones
         perm = my_sortperm(Int[let new_node = code.new_nodes.info[i]
             (new_node.pos * 2 + Int(new_node.attach_after))
@@ -1416,7 +1416,7 @@ function complete(compact::IncrementalCompact)
     return IRCode(compact.ir, compact.result, cfg, compact.new_new_nodes)
 end
 
-function compact!(code::IRCode, allow_cfg_transforms::Bool=false)
+function compact!(code::IRCode, allow_cfg_transforms::Bool=true)
     compact = IncrementalCompact(code, allow_cfg_transforms)
     # Just run through the iterator without any processing
     for _ in compact; end # _ isa Pair{Int, Any}
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 85ac8fd6a0f4c..334bafc5c8685 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -161,7 +161,7 @@ function f_ifelse(x)
     return b ? x + 1 : x
 end
 # 2 for now because the compiler leaves a GotoNode around
-@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
+@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
 
 # Test that inlining of _apply properly hits the inference cache
 @noinline cprop_inline_foo1() = (1, 1)