diff --git a/base/timing.jl b/base/timing.jl index 45a27e3378977..2132657c864c7 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -59,6 +59,34 @@ end cumulative_compile_time_ns_before() = ccall(:jl_cumulative_compile_time_ns_before, UInt64, ()) cumulative_compile_time_ns_after() = ccall(:jl_cumulative_compile_time_ns_after, UInt64, ()) +""" + Base.track_compile_time_permanently() + +Permanently enable tracking of time spent in compilation by Julia. + +Julia has the ability to measure the amount of time spent during compilation (including +type-inference, optimization, llvm optimizaiton, codegen, etc). However, on some systems +(FreeBSD-based systems are the known problems), this measurment can be too expensive, so +it is not enabled by default. + +Calling this function will globally enable tracking the cumulative compilation time. + +You can fetch the current total cumulative time spent in compilation by calling: +- [`Base.cumulative_compile_time_ns_total()`](@ref) +""" +track_compile_time_permanently() = ccall(:jl_track_compile_time_permanently, UInt64, ()) +""" + Base.cumulative_compile_time_ns_total() + +The current total cumulative time Julia has spent in compilation, in nanoseconds. + +To enable this global measurement, you must call +[`Base.track_compile_time_permanently()`](@ref) after starting this julia process. +Otherwise, this function will only return the total time spent in compilation during calls +to [`Base.@time`](@ref). +""" +cumulative_compile_time_ns_total() = ccall(:jl_cumulative_compile_time_ns_total, UInt64, ()) + # total time spend in garbage collection, in nanoseconds gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ()) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 60b1903aaa802..5e28ef204c7e1 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -74,20 +74,45 @@ void jl_jit_globals(std::map &globals) } extern "C" JL_DLLEXPORT -uint64_t jl_cumulative_compile_time_ns_before() +void jl_track_compile_time_permanently() { - // Increment the flag to allow reentrant callers to `@time`. + jl_always_measure_compile_time = 1; jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1); +} + +extern "C" JL_DLLEXPORT +uint64_t jl_cumulative_compile_time_ns_before() +{ + if (!jl_always_measure_compile_time) { + // Increment the flag to allow reentrant callers to `@time`. + jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1); + } return jl_atomic_load_relaxed(&jl_cumulative_compile_time); } + extern "C" JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns_after() { - // Decrement the flag when done measuring, allowing other callers to continue measuring. - jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1); + if (!jl_always_measure_compile_time) { + // Decrement the flag when done measuring, allowing other callers to continue measuring. + jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1); + } return jl_atomic_load_relaxed(&jl_cumulative_compile_time); } +extern "C" JL_DLLEXPORT +uint64_t jl_cumulative_compile_time_ns_total() +{ + return jl_atomic_load_relaxed(&jl_cumulative_compile_time); +} + +extern "C" JL_DLLEXPORT +uint64_t jl_cumulative_compile_time_ns_for_current_thread() +{ + return jl_cumulative_compile_time_per_thread[jl_threadid()]; +} + + // this generates llvm code for the lambda info // and adds the result to the jitlayers // (and the shadow module), diff --git a/src/julia_internal.h b/src/julia_internal.h index a7943e11a8067..99c25824c8885 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -154,9 +154,11 @@ static inline uint64_t cycleclock(void) #include "timing.h" -// Global *atomic* integers controlling *process-wide* measurement of compilation time. -extern uint8_t jl_measure_compile_time_enabled; -extern uint64_t jl_cumulative_compile_time; +// Global variables controlling *process-wide* measurement of compilation time. +extern uint8_t jl_always_measure_compile_time; +extern uint8_t jl_measure_compile_time_enabled; // *atomic* +extern uint64_t jl_cumulative_compile_time; // *atomic* +extern uint64_t *jl_cumulative_compile_time_per_thread; #ifdef _COMPILER_MICROSOFT_ # define jl_return_address() ((uintptr_t)_ReturnAddress()) diff --git a/src/task.c b/src/task.c index 88d4eac0863c9..ba39ce69b4b13 100644 --- a/src/task.c +++ b/src/task.c @@ -562,10 +562,14 @@ static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_M ptls->io_wait = 0; // @time needs its compile timer disabled on error, // and cannot use a try-finally as it would break scope for assignments - // We blindly disable compilation time tracking here, for all running Tasks, even though - // it may cause some incorrect measurements. This is a known bug, and is being tracked - // here: https://github.com/JuliaLang/julia/pull/39138 - jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0); + // Though if the user has requested to _always_ measure compile time, we don't need to + // enable or disable it. + if (!jl_always_measure_compile_time) { + // We blindly disable compilation time tracking here, for all running Tasks, even + // though it may cause some incorrect measurements. This is a known bug, and is + // being tracked here: https://github.com/JuliaLang/julia/pull/39138 + jl_atomic_store_relaxed(&jl_measure_compile_time_enabled, 0); + } JL_GC_PUSH1(&exception); jl_gc_unsafe_enter(ptls); if (exception) { diff --git a/src/threading.c b/src/threading.c index ffe53c07b45ee..0f663e14a0386 100644 --- a/src/threading.c +++ b/src/threading.c @@ -287,8 +287,10 @@ void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k) #endif jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; +uint8_t jl_always_measure_compile_time = 0; uint8_t jl_measure_compile_time_enabled = 0; uint64_t jl_cumulative_compile_time = 0; +uint64_t *jl_cumulative_compile_time_per_thread = NULL; // return calling thread's ID // Also update the suspended_threads list in signals-mach when changing the @@ -467,6 +469,7 @@ void jl_init_threading(void) } if (jl_n_threads <= 0) jl_n_threads = 1; + jl_cumulative_compile_time_per_thread = (uint64_t*)calloc(jl_n_threads, sizeof(*jl_cumulative_compile_time_per_thread)); #ifndef __clang_analyzer__ jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*)); #endif diff --git a/test/misc.jl b/test/misc.jl index 94c35c43ffaec..b8ba13001c0a2 100644 --- a/test/misc.jl +++ b/test/misc.jl @@ -264,6 +264,7 @@ end @test timev_macro_scope() == 1 before = Base.cumulative_compile_time_ns_before(); +@test Base.cumulative_compile_time_ns_total() >= before; # exercise concurrent calls to `@time` for reentrant compilation time measurement. t1 = @async @time begin @@ -279,6 +280,28 @@ end after = Base.cumulative_compile_time_ns_after(); @test after >= before; +@test Base.cumulative_compile_time_ns_total() >= after; + +@testset "cumulative_compile_time_ns_total()" begin + # Global compilation tracking is disabled, so total time doesn't increment + ct1 = Base.cumulative_compile_time_ns_total() + @eval module M ; f(x,y) = x+y ; end + @eval M.f(2,3) + ct2 = Base.cumulative_compile_time_ns_total() + @test ct2 == ct1 + + # !!! Note that this cannot be undone !!! + Base.track_compile_time_permanently() + + # With global compilation tracking *enabled*, total time does increment + ct1 = Base.cumulative_compile_time_ns_total() + @eval module M ; f(x,y) = x+y ; end + @eval M.f(2,3) + ct2 = Base.cumulative_compile_time_ns_total() + @test ct2 > ct1 +end + + # interactive utilities struct ambigconvert; end # inject a problematic `convert` method to ensure it still works