Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ end

function Compiler.transform_result_for_cache(interp::SplitCacheInterp, result::Compiler.InferenceResult, edges::Compiler.SimpleVector)
opt = result.src::Compiler.OptimizationState
ir = opt.result.ir::Compiler.IRCode
ir = opt.optresult.ir::Compiler.IRCode
override = with_new_compiler
for inst in ir.stmts
stmt = inst[:stmt]
Expand Down
98 changes: 34 additions & 64 deletions Compiler/src/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,14 @@ function inline_cost_clamp(x::Int)
return convert(InlineCostType, x)
end

const SRC_FLAG_DECLARED_INLINE = 0x1
const SRC_FLAG_DECLARED_NOINLINE = 0x2

is_declared_inline(@nospecialize src::MaybeCompressed) =
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_INLINE

is_declared_noinline(@nospecialize src::MaybeCompressed) =
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2
ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_NOINLINE

#####################
# OptimizationState #
Expand Down Expand Up @@ -157,6 +160,7 @@ code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.wor

mutable struct OptimizationResult
ir::IRCode
inline_flag::UInt8
simplified::Bool # indicates whether the IR was processed with `cfg_simplify!`
end

Expand All @@ -168,7 +172,7 @@ end
mutable struct OptimizationState{Interp<:AbstractInterpreter}
linfo::MethodInstance
src::CodeInfo
result::Union{Nothing, OptimizationResult}
optresult::Union{Nothing, OptimizationResult}
stmt_info::Vector{CallInfo}
mod::Module
sptypes::Vector{VarState}
Expand Down Expand Up @@ -236,13 +240,29 @@ include("ssair/EscapeAnalysis.jl")
include("ssair/passes.jl")
include("ssair/irinterp.jl")

function ir_to_codeinf!(opt::OptimizationState, frame::InferenceState, edges::SimpleVector)
ir_to_codeinf!(opt, edges, compute_inlining_cost(frame.interp, frame.result, opt.optresult))
end

function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector, inlining_cost::InlineCostType)
src = ir_to_codeinf!(opt, edges)
src.inlining_cost = inlining_cost
src
end

function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector)
src = ir_to_codeinf!(opt)
src.edges = edges
src
end

function ir_to_codeinf!(opt::OptimizationState)
(; linfo, src, result) = opt
if result === nothing
(; linfo, src, optresult) = opt
if optresult === nothing
return src
end
src = ir_to_codeinf!(src, result.ir)
opt.result = nothing
src = ir_to_codeinf!(src, optresult.ir)
opt.optresult = nothing
opt.src = src
maybe_validate_code(linfo, src, "optimized")
return src
Expand Down Expand Up @@ -485,63 +505,12 @@ end
abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]

"""
finish(interp::AbstractInterpreter, opt::OptimizationState,
ir::IRCode, caller::InferenceResult)
finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)

Post-process information derived by Julia-level optimizations for later use.
In particular, this function determines the inlineability of the optimized code.
Called at the end of optimization to store the resulting IR back into the OptimizationState.
"""
function finish(interp::AbstractInterpreter, opt::OptimizationState,
ir::IRCode, caller::InferenceResult)
(; src, linfo) = opt
(; def, specTypes) = linfo

force_noinline = is_declared_noinline(src)

# compute inlining and other related optimizations
result = caller.result
@assert !(result isa LimitedAccuracy)
result = widenslotwrapper(result)

opt.result = OptimizationResult(ir, false)

# determine and cache inlineability
if !force_noinline
sig = unwrap_unionall(specTypes)
if !(isa(sig, DataType) && sig.name === Tuple.name)
force_noinline = true
end
if !is_declared_inline(src) && result === Bottom
force_noinline = true
end
end
if force_noinline
set_inlineable!(src, false)
elseif isa(def, Method)
if is_declared_inline(src) && isdispatchtuple(specTypes)
# obey @inline declaration if a dispatch barrier would not help
set_inlineable!(src, true)
else
# compute the cost (size) of inlining this code
params = OptimizationParams(interp)
cost_threshold = default = params.inline_cost_threshold
if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result))
cost_threshold += params.inline_tupleret_bonus
end
# if the method is declared as `@inline`, increase the cost threshold 20x
if is_declared_inline(src)
cost_threshold += 19*default
end
# a few functions get special treatment
if def.module === _topmod(def.module)
name = def.name
if name === :iterate || name === :unsafe_convert || name === :cconvert
cost_threshold += 4*default
end
end
src.inlining_cost = inline_cost(ir, params, cost_threshold)
end
end
function finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode)
opt.optresult = OptimizationResult(ir, ccall(:jl_ir_flag_inlining, UInt8, (Any,), opt.src), false)
return nothing
end

Expand Down Expand Up @@ -1015,7 +984,8 @@ end
function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult)
@timeit "optimizer" ir = run_passes_ipo_safe(opt.src, opt)
ipo_dataflow_analysis!(interp, opt, ir, caller)
return finish(interp, opt, ir, caller)
finishopt!(interp, opt, ir)
return nothing
end

macro pass(name, expr)
Expand Down Expand Up @@ -1459,7 +1429,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod
return thiscost
end

function inline_cost(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
function inline_cost_model(ir::IRCode, params::OptimizationParams, cost_threshold::Int)
bodycost = 0
for i = 1:length(ir.stmts)
stmt = ir[SSAValue(i)][:stmt]
Expand Down
2 changes: 1 addition & 1 deletion Compiler/src/ssair/inlining.jl
Original file line number Diff line number Diff line change
Expand Up @@ -976,7 +976,7 @@ function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local
return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts))
end
function retrieve_ir_for_inlining(mi::MethodInstance, opt::OptimizationState, preserve_local_sources::Bool)
result = opt.result
result = opt.optresult
if result !== nothing
!result.simplified && simplify_ir!(result)
return retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources)
Expand Down
131 changes: 108 additions & 23 deletions Compiler/src/typeinfer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@ end
function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt, time_before::UInt64)
result = caller.result
#@assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges)
if isdefined(result, :ci)
if caller.cache_mode === CACHE_MODE_LOCAL
@assert !isdefined(result, :ci)
result.src = transform_result_for_local_cache(interp, result)
elseif isdefined(result, :ci)
edges = result_edges(interp, caller)
ci = result.ci
# if we aren't cached, we don't need this edge
Expand All @@ -115,11 +118,16 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
store_backedges(ci, edges)
end
inferred_result = nothing
uncompressed = inferred_result
uncompressed = result.src
const_flag = is_result_constabi_eligible(result)
debuginfo = get_debuginfo(result.src)
discard_src = caller.cache_mode === CACHE_MODE_NULL || const_flag
if !discard_src
Copy link
Member

@vtjnash vtjnash Apr 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the implementation of everything after here assumes that transform_result_for_cache returns a CodeInfo for correctness, and needs to be re-written

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like debuginfo may still computed incorrectly on this part of the PR

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@serenity4 take a look?

Although independently it does now feel a bit weird from a dataflow perspective that the debuginfo that goes with the optimized source takes a different path.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would make more sense to me to take di = src.debuginfo (where src::CodeInfo is extracted from result.src::Union{CodeInfo, OptimizationState}) and override it with what we can extract from inferred_result (essentially doing the same logic as for result.src).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps something like that: serenity4@318b9a3

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've rebased this PR and cherry-picked your commit.

inferred_result = transform_result_for_cache(interp, result, edges)
if inferred_result !== nothing
uncompressed = inferred_result
debuginfo = get_debuginfo(inferred_result)
end
# TODO: do we want to augment edges here with any :invoke targets that we got from inlining (such that we didn't have a direct edge to it already)?
if inferred_result isa CodeInfo
result.src = inferred_result
Expand All @@ -128,27 +136,28 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
resize!(inferred_result.slottypes::Vector{Any}, nslots)
resize!(inferred_result.slotnames, nslots)
end
di = inferred_result.debuginfo
uncompressed = inferred_result
inferred_result = maybe_compress_codeinfo(interp, result.linfo, inferred_result)
result.is_src_volatile = false
elseif ci.owner === nothing
# The global cache can only handle objects that codegen understands
inferred_result = nothing
end
end
if !@isdefined di
di = DebugInfo(result.linfo)
if debuginfo === nothing
debuginfo = DebugInfo(result.linfo)
end
time_now = _time_ns()
time_self_ns = caller.time_self_ns + (time_now - time_before)
time_total = (time_now - caller.time_start - caller.time_paused) * 1e-9
ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects),
result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, di, edges)
result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, debuginfo, edges)
engine_reject(interp, ci)
codegen = codegen_cache(interp)
if !discard_src && codegen !== nothing && uncompressed isa CodeInfo
if !discard_src && codegen !== nothing && (isa(uncompressed, CodeInfo) || isa(uncompressed, OptimizationState))
if isa(uncompressed, OptimizationState)
uncompressed = ir_to_codeinf!(uncompressed, edges)
end
# record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
codegen[ci] = uncompressed
if bootstrapping_compiler && inferred_result == nothing
Expand Down Expand Up @@ -299,36 +308,113 @@ function adjust_cycle_frame!(sv::InferenceState, cycle_valid_worlds::WorldRange,
return nothing
end

function get_debuginfo(src)
isa(src, CodeInfo) && return src.debuginfo
isa(src, OptimizationState) && return src.src.debuginfo
return nothing
end

function is_result_constabi_eligible(result::InferenceResult)
result_type = result.result
return isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val)
end

function transform_result_for_cache(::AbstractInterpreter, result::InferenceResult, edges::SimpleVector)
function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult)
src = result.src
isa(src, OptimizationState) || return MAX_INLINE_COST
compute_inlining_cost(interp, result, src.optresult)
end

function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult, optresult#=::OptimizationResult=#)
return inline_cost_model(interp, result, optresult.inline_flag, optresult.ir)
end

function inline_cost_model(interp::AbstractInterpreter, result::InferenceResult,
inline_flag::UInt8, ir::IRCode)

inline_flag === SRC_FLAG_DECLARED_NOINLINE && return MAX_INLINE_COST

mi = result.linfo
(; def, specTypes) = mi
if !isa(def, Method)
return MAX_INLINE_COST
end

declared_inline = inline_flag === SRC_FLAG_DECLARED_INLINE

rt = result.result
@assert !(rt isa LimitedAccuracy)
rt = widenslotwrapper(rt)

sig = unwrap_unionall(specTypes)
if !(isa(sig, DataType) && sig.name === Tuple.name)
return MAX_INLINE_COST
end
if !declared_inline && rt === Bottom
return MAX_INLINE_COST
end

if declared_inline && isdispatchtuple(specTypes)
# obey @inline declaration if a dispatch barrier would not help
return MIN_INLINE_COST
else
# compute the cost (size) of inlining this code
params = OptimizationParams(interp)
cost_threshold = default = params.inline_cost_threshold
if ⊑(optimizer_lattice(interp), rt, Tuple) && !isconcretetype(widenconst(rt))
cost_threshold += params.inline_tupleret_bonus
end
# if the method is declared as `@inline`, increase the cost threshold 20x
if declared_inline
cost_threshold += 19*default
end
# a few functions get special treatment
if def.module === _topmod(def.module)
name = def.name
if name === :iterate || name === :unsafe_convert || name === :cconvert
cost_threshold += 4*default
end
end
return inline_cost_model(ir, params, cost_threshold)
end
end

function transform_result_for_local_cache(interp::AbstractInterpreter, result::InferenceResult)
src = result.src
if isa(src, OptimizationState)
src = ir_to_codeinf!(src)
# Compute and store any information required to determine the inlineability of the callee.
opt = src
opt.src.inlining_cost = compute_inlining_cost(interp, result)
end
return src
end

function transform_result_for_cache(interp::AbstractInterpreter, result::InferenceResult, edges::SimpleVector)
inlining_cost = nothing
src = result.src
if isa(src, OptimizationState)
opt = src
inlining_cost = compute_inlining_cost(interp, result, opt.optresult)
discard_optimized_result(interp, opt, inlining_cost) && return nothing
src = ir_to_codeinf!(opt)
end
if isa(src, CodeInfo)
src.edges = edges
src.inlining_cost = inlining_cost !== nothing ? inlining_cost : compute_inlining_cost(interp, result)
end
return src
end

function discard_optimized_result(interp::AbstractInterpreter, opt#=::OptimizationState=#, inlining_cost#=::InlineCostType=#)
may_discard_trees(interp) || return false
return inlining_cost == MAX_INLINE_COST
end

function maybe_compress_codeinfo(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInfo)
def = mi.def
isa(def, Method) || return ci # don't compress toplevel code
can_discard_trees = may_discard_trees(interp)
cache_the_tree = !can_discard_trees || is_inlineable(ci)
if cache_the_tree
if may_compress(interp)
return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
else
return ci
end
else
return nothing
end
may_compress(interp) && return ccall(:jl_compress_ir, String, (Any, Any), def, ci)
return ci
end

function cache_result!(interp::AbstractInterpreter, result::InferenceResult, ci::CodeInstance)
Expand Down Expand Up @@ -1101,8 +1187,7 @@ function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_opti
else
opt = OptimizationState(frame, interp)
optimize(interp, opt, frame.result)
src = ir_to_codeinf!(opt)
src.edges = Core.svec(opt.inlining.edges...)
src = ir_to_codeinf!(opt, frame, Core.svec(opt.inlining.edges...))
end
result.src = frame.src = src
end
Expand Down
1 change: 1 addition & 0 deletions Compiler/test/codegen.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

using Random
using InteractiveUtils
using InteractiveUtils: code_llvm, code_native
using Libdl
using Test

Expand Down
4 changes: 4 additions & 0 deletions Compiler/test/inline.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

module inline_tests

using Test
using Base.Meta
using Core: ReturnNode
Expand Down Expand Up @@ -2311,3 +2313,5 @@ g_noinline_invoke(x) = f_noinline_invoke(x)
let src = code_typed1(g_noinline_invoke, (Union{Symbol,Nothing},))
@test !any(@nospecialize(x)->isa(x,GlobalRef), src.code)
end

end # module inline_tests