Skip to content

Commit 54411f1

Browse files
committed
optimizer: cancel finalizer registration rather than inserting finalize
This seems to perform well. ```julia mutable struct AtomicCounter @atomic count::Int end const counter = AtomicCounter(0) function withfinalizer(x) xs = finalizer(Ref(x)) do obj Base.@assume_effects :nothrow :notaskstate @atomic counter.count += obj[] end println(devnull, xs[]) xs[] = xs[] println(devnull, xs[]) return xs[] end @benchmark withfinalizer(100) ``` > master ``` BenchmarkTools.Trial: 10000 samples with 867 evaluations. Range (min … max): 135.717 ns … 18.617 μs ┊ GC (min … max): 0.00% … 47.51% Time (median): 144.992 ns ┊ GC (median): 0.00% Time (mean ± σ): 192.949 ns ± 687.513 ns ┊ GC (mean ± σ): 11.57% ± 3.23% ▇ ▃█▅▂ ▁▁▂█▇▂████▇▆▅▄▄▄▅▄▄▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁ ▂ 136 ns Histogram: frequency by time 201 ns < Memory estimate: 208 bytes, allocs estimate: 7. ``` > this commit ``` BenchmarkTools.Trial: 10000 samples with 964 evaluations. Range (min … max): 83.506 ns … 3.408 μs ┊ GC (min … max): 0.00% … 96.85% Time (median): 88.780 ns ┊ GC (median): 0.00% Time (mean ± σ): 101.797 ns ± 137.067 ns ┊ GC (mean ± σ): 10.26% ± 7.31% ▄█▃▅ ▄▃████▅▅▆▅▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▁▁▂▁▂▂▁▂▂ ▃ 83.5 ns Histogram: frequency by time 145 ns < Memory estimate: 208 bytes, allocs estimate: 7. ```
1 parent fecbc3e commit 54411f1

File tree

12 files changed

+137
-36
lines changed

12 files changed

+137
-36
lines changed

base/compiler/abstractinterpretation.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2279,10 +2279,10 @@ function abstract_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any},
22792279
finalizer_argvec = Any[argtypes[2], argtypes[3]]
22802280
call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), StmtInfo(false), sv, #=max_methods=#1)::Future
22812281
return Future{CallMeta}(call, interp, sv) do call, interp, sv
2282-
return CallMeta(Nothing, Any, Effects(), FinalizerInfo(call.info, call.effects))
2282+
return CallMeta(Int, Any, Effects(), FinalizerInfo(call.info, call.effects))
22832283
end
22842284
end
2285-
return Future(CallMeta(Nothing, Any, Effects(), NoCallInfo()))
2285+
return Future(CallMeta(Int, Any, Effects(), NoCallInfo()))
22862286
end
22872287

22882288
function abstract_throw(interp::AbstractInterpreter, argtypes::Vector{Any}, ::AbsIntState)

base/compiler/ssair/passes.jl

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,11 +1626,9 @@ function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int,
16261626
insert_bb != 0 || return nothing # verify post-dominator of all uses exists
16271627

16281628
# Figure out the exact statement where we're going to inline the finalizer.
1629-
loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
1630-
attach_after = insert_idx !== nothing
1631-
flag = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
16321629
finalizer_stmt = ir[SSAValue(finalizer_idx)][:stmt]
16331630

1631+
current_task_ssa = nothing
16341632
if !OptimizationParams(inlining.interp).assume_fatal_throw
16351633
# Collect all reachable blocks between the finalizer registration and the
16361634
# insertion point
@@ -1655,15 +1653,25 @@ function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int,
16551653

16561654
# An exception may be thrown between the finalizer registration and the point
16571655
# where the object’s lifetime ends (`insert_idx`): In such cases, we can’t
1658-
# remove the finalizer registration, but we can still insert a `Core.finalize`
1659-
# call at `insert_idx` while leaving the registration intact.
1660-
newinst = add_flag(NewInstruction(Expr(:call, GlobalRef(Core, :finalize), finalizer_stmt.args[3]), Nothing), flag)
1661-
insert_node!(ir, loc, newinst, attach_after)
1662-
return nothing
1656+
# remove the finalizer registration, but we can still inline the finalizer
1657+
# with inserting `Core._cancel_finalizer` at the end.
1658+
# Here, prepare a reference to the current task object that should be passed to
1659+
# `Core._cancel_finalizer` and insert it into `Core.finalizer` so that the
1660+
# finalizer is added to the ptls of the current task.
1661+
current_task_stmt = Expr(:foreigncall, QuoteNode(:jl_get_current_task),
1662+
Core.Ref{Core.Task}, Core.svec(), 0, QuoteNode(:ccall))
1663+
newinst = NewInstruction(current_task_stmt, Core.Task)
1664+
current_task_ssa = insert_node!(ir, finalizer_idx, newinst)
1665+
push!(finalizer_stmt.args, current_task_ssa)
1666+
break
16631667
end
16641668
end
16651669

1666-
argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]]
1670+
loc = insert_idx === nothing ? first(ir.cfg.blocks[insert_bb].stmts) : insert_idx::Int
1671+
attach_after = insert_idx !== nothing
1672+
flag = info isa FinalizerInfo ? flags_for_effects(info.effects) : IR_FLAG_NULL
1673+
alloc_obj = finalizer_stmt.args[3]
1674+
argexprs = Any[finalizer_stmt.args[2], alloc_obj]
16671675
if length(finalizer_stmt.args) >= 4
16681676
inline = finalizer_stmt.args[4]
16691677
if inline === nothing
@@ -1681,8 +1689,16 @@ function try_resolve_finalizer!(ir::IRCode, alloc_idx::Int, finalizer_idx::Int,
16811689
newinst = add_flag(NewInstruction(Expr(:call, argexprs...), Nothing), flag)
16821690
insert_node!(ir, loc, newinst, attach_after)
16831691
end
1684-
# Erase the call to `finalizer`
1685-
ir[SSAValue(finalizer_idx)][:stmt] = nothing
1692+
cancel_registration = current_task_ssa !== nothing
1693+
if cancel_registration
1694+
lookup_idx_ssa = SSAValue(finalizer_idx)
1695+
finalize_call = Expr(:call, GlobalRef(Core, :_cancel_finalizer), alloc_obj, current_task_ssa, lookup_idx_ssa)
1696+
newinst = add_flag(NewInstruction(finalize_call, Nothing), flag)
1697+
insert_node!(ir, loc, newinst, attach_after)
1698+
else
1699+
# Erase the call to `finalizer`
1700+
ir[SSAValue(finalizer_idx)][:stmt] = nothing
1701+
end
16861702
return nothing
16871703
end
16881704

base/compiler/tfuncs.jl

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -742,8 +742,13 @@ add_tfunc(donotdelete, 0, INT_INF, @nospecs((𝕃::AbstractLattice, args...)->No
742742
end
743743
end
744744
add_tfunc(compilerbarrier, 2, 2, compilerbarrier_tfunc, 5)
745-
add_tfunc(Core.finalizer, 2, 4, @nospecs((𝕃::AbstractLattice, args...)->Nothing), 5)
746-
add_tfunc(Core.finalize, 1, 1, @nospecs((𝕃::AbstractLattice, o)->Nothing), 100)
745+
add_tfunc(Core.finalizer, 2, 5, @nospecs((𝕃::AbstractLattice, args...)->Int), 5)
746+
@nospecs function _cancel_finalizer_tfunc(𝕃::AbstractLattice, o, ct, lookup_idx)
747+
hasintersect(widenconst(ct), Task) || return Bottom
748+
hasintersect(widenconst(lookup_idx), Int) || return Bottom
749+
return Nothing
750+
end
751+
add_tfunc(Core._cancel_finalizer, 3, 3, _cancel_finalizer_tfunc, 5)
747752

748753
@nospecs function compilerbarrier_nothrow(setting, val)
749754
return isa(setting, Const) && contains_is((:type, :const, :conditional), setting.val)
@@ -2288,12 +2293,12 @@ function _builtin_nothrow(𝕃::AbstractLattice, @nospecialize(f::Builtin), argt
22882293
elseif f === donotdelete
22892294
return true
22902295
elseif f === Core.finalizer
2291-
2 <= na <= 4 || return false
2296+
2 <= na <= 5 || return false
22922297
# `Core.finalizer` does no error checking - that's done in Base.finalizer
22932298
return true
2294-
elseif f === Core.finalize
2295-
na == 2 || return false
2296-
return true # `Core.finalize` does no error checking
2299+
elseif f === Core._cancel_finalizer
2300+
na == 3 || return false
2301+
return argtypes[2] Task && argtypes[3] Int
22972302
elseif f === Core.compilerbarrier
22982303
na == 2 || return false
22992304
return compilerbarrier_nothrow(argtypes[1], nothing)

base/gcutils.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ end
9898
9999
Immediately run finalizers registered for object `x`.
100100
"""
101-
finalize(@nospecialize(o)) = Core.finalize(o)
101+
finalize(@nospecialize(o)) = (:jl_finalize_th, Cvoid, (Any, Any,), current_task(), o)
102102

103103
"""
104104
Base.GC

src/builtin_proto.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ DECLARE_BUILTIN(_apply_pure);
2626
DECLARE_BUILTIN(_call_in_world);
2727
DECLARE_BUILTIN(_call_in_world_total);
2828
DECLARE_BUILTIN(_call_latest);
29+
DECLARE_BUILTIN(_cancel_finalizer);
2930
DECLARE_BUILTIN(_compute_sparams);
3031
DECLARE_BUILTIN(_expr);
3132
DECLARE_BUILTIN(_svec_ref);
@@ -37,7 +38,6 @@ DECLARE_BUILTIN(compilerbarrier);
3738
DECLARE_BUILTIN(current_scope);
3839
DECLARE_BUILTIN(donotdelete);
3940
DECLARE_BUILTIN(fieldtype);
40-
DECLARE_BUILTIN(finalize);
4141
DECLARE_BUILTIN(finalizer);
4242
DECLARE_BUILTIN(getfield);
4343
DECLARE_BUILTIN(getglobal);

src/builtins.c

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2004,16 +2004,22 @@ JL_CALLABLE(jl_f_compilerbarrier)
20042004
JL_CALLABLE(jl_f_finalizer)
20052005
{
20062006
// NOTE the compiler may temporarily insert additional argument for the later inlining pass
2007-
JL_NARGS(finalizer, 2, 4);
2007+
JL_NARGS(finalizer, 2, 5);
2008+
if (nargs == 5 && jl_is_task(args[4]))
2009+
// There are cases where the compiler inserts `current_task` as the 5th argument,
2010+
// and in such cases, the finalizer is added to the `ptls` of that task.
2011+
// This task is later referenced by `_cancel_finalizer`.
2012+
return jl_box_long(jl_gc_add_finalizer_(((jl_task_t*)args[4])->ptls, args[1], args[0]));
20082013
jl_task_t *ct = jl_current_task;
2009-
jl_gc_add_finalizer_(ct->ptls, args[1], args[0]);
2010-
return jl_nothing;
2014+
return jl_box_long(jl_gc_add_finalizer_(ct->ptls, args[1], args[0]));
20112015
}
20122016

2013-
JL_CALLABLE(jl_f_finalize)
2017+
JL_CALLABLE(jl_f__cancel_finalizer)
20142018
{
2015-
JL_NARGS(finalize, 1, 1);
2016-
jl_finalize(args[0]);
2019+
JL_NARGS(_cancel_finalizer, 3, 3);
2020+
JL_TYPECHK(_cancel_finalizer, task, args[1])
2021+
JL_TYPECHK(_cancel_finalizer, long, args[2])
2022+
jl_cancel_finalizer((jl_value_t*)args[0], (jl_task_t*)args[1], jl_unbox_long(args[2]));
20172023
return jl_nothing;
20182024
}
20192025

@@ -2449,7 +2455,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
24492455
jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete);
24502456
jl_builtin_compilerbarrier = add_builtin_func("compilerbarrier", jl_f_compilerbarrier);
24512457
add_builtin_func("finalizer", jl_f_finalizer);
2452-
add_builtin_func("finalize", jl_f_finalize);
2458+
add_builtin_func("_cancel_finalizer", jl_f__cancel_finalizer);
24532459
add_builtin_func("_compute_sparams", jl_f__compute_sparams);
24542460
add_builtin_func("_svec_ref", jl_f__svec_ref);
24552461
jl_builtin_current_scope = add_builtin_func("current_scope", jl_f_current_scope);

src/codegen.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1646,7 +1646,7 @@ static const auto &builtin_func_map() {
16461646
{ jl_f_donotdelete_addr, new JuliaFunction<>{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} },
16471647
{ jl_f_compilerbarrier_addr, new JuliaFunction<>{XSTR(jl_f_compilerbarrier), get_func_sig, get_func_attrs} },
16481648
{ jl_f_finalizer_addr, new JuliaFunction<>{XSTR(jl_f_finalizer), get_func_sig, get_func_attrs} },
1649-
{ jl_f_finalize_addr, new JuliaFunction<>{XSTR(jl_f_finalize), get_func_sig, get_func_attrs} },
1649+
{ jl_f__cancel_finalizer_addr, new JuliaFunction<>{XSTR(jl_f__cancel_finalizer), get_func_sig, get_func_attrs} },
16501650
{ jl_f__svec_ref_addr, new JuliaFunction<>{XSTR(jl_f__svec_ref), get_func_sig, get_func_attrs} },
16511651
{ jl_f_current_scope_addr, new JuliaFunction<>{XSTR(jl_f_current_scope), get_func_sig, get_func_attrs} },
16521652
};

src/gc-common.c

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ void run_finalizer(jl_task_t *ct, void *o, void *ff)
158158
// if `need_sync` is true, the `list` is the `finalizers` list of another
159159
// thread and we need additional synchronizations
160160
static void finalize_object(arraylist_t *list, jl_value_t *o,
161-
arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
161+
arraylist_t *copied_list, int need_sync) JL_NOTSAFEPOINT
162162
{
163163
// The acquire load makes sure that the first `len` objects are valid.
164164
// If `need_sync` is true, all mutations of the content should be limited
@@ -387,7 +387,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct)
387387
run_finalizers(ct, 1);
388388
}
389389

390-
void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
390+
size_t jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
391391
{
392392
assert(jl_atomic_load_relaxed(&ptls->gc_state) == JL_GC_STATE_UNSAFE);
393393
arraylist_t *a = &ptls->finalizers;
@@ -413,6 +413,7 @@ void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
413413
items[oldlen] = v;
414414
items[oldlen + 1] = f;
415415
jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
416+
return oldlen;
416417
}
417418

418419
JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
@@ -459,8 +460,9 @@ JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
459460
gc_all_tls_states = jl_atomic_load_relaxed(&jl_all_tls_states);
460461
for (int i = 0; i < gc_n_threads; i++) {
461462
jl_ptls_t ptls2 = gc_all_tls_states[i];
462-
if (ptls2 != NULL)
463+
if (ptls2 != NULL) {
463464
finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
465+
}
464466
}
465467
finalize_object(&finalizer_list_marked, o, &copied_list, 0);
466468
if (copied_list.len > 0) {
@@ -478,6 +480,43 @@ JL_DLLEXPORT void jl_finalize(jl_value_t *o)
478480
jl_finalize_th(jl_current_task, o);
479481
}
480482

483+
// Remove the finalizer for `o` from `ct->ptls->finalizers` and cancel that finalizer.
484+
// `lookup_idx` is the index at which the finalizer was registered in `ct->ptls->finalizers`
485+
// by `jl_gc_add_finalizer_` (used for the fast path).
486+
// Note that it is assumed that only a single finalizer exists for `o`.
487+
void jl_cancel_finalizer(jl_value_t *o, jl_task_t *ct, int lookup_idx)
488+
{
489+
arraylist_t *list = &ct->ptls->finalizers;
490+
size_t len = list->len;
491+
void **items = list->items;
492+
if (0 <= lookup_idx && lookup_idx < len) {
493+
void *v = items[lookup_idx];
494+
if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
495+
for (size_t i = lookup_idx + 2; i < len; i += 2) {
496+
items[i-2] = items[i];
497+
items[i-1] = items[i+1];
498+
}
499+
list->len = len - 2;
500+
return;
501+
}
502+
}
503+
// slow path for erasing the finalizer
504+
int finalizer_erased = 0;
505+
for (size_t i = 0; i < len; i += 2) {
506+
void *v = items[i];
507+
if (o == (jl_value_t*)gc_ptr_clear_tag(v, 1)) {
508+
for (size_t j = i + 2; j < len; j += 2) {
509+
items[j-2] = items[j];
510+
items[j-1] = items[j+1];
511+
}
512+
list->len = len - 2;
513+
finalizer_erased = 1;
514+
break;
515+
}
516+
}
517+
assert(finalizer_erased);
518+
}
519+
481520
// =========================================================================== //
482521
// Threading
483522
// =========================================================================== //

src/julia.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2326,6 +2326,8 @@ JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e, jl_task_t *ct);
23262326
JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
23272327
#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
23282328

2329+
void jl_cancel_finalizer(jl_value_t *o, jl_task_t *ct, int lookup_idx);
2330+
23292331
extern JL_DLLIMPORT int jl_task_gcstack_offset;
23302332
extern JL_DLLIMPORT int jl_task_ptls_offset;
23312333

src/julia_internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
607607
void jl_gc_count_freed(size_t sz) JL_NOTSAFEPOINT;
608608
void jl_gc_run_all_finalizers(jl_task_t *ct);
609609
void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
610-
void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
610+
size_t jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT;
611611

612612
void jl_gc_debug_print_status(void) JL_NOTSAFEPOINT;
613613
JL_DLLEXPORT void jl_gc_debug_critical_error(void) JL_NOTSAFEPOINT;

0 commit comments

Comments
 (0)