Closed
Description
The gc is frequently segfaulting during the GC tests, as seen for example in the core dump here: https://buildkite.com/julialang/julia-master/builds/33633#018da2eb-0a9c-4bc8-b9af-7f5c1cd8fa99
The GC-worker thread is seen trying to access GC state outside of when that seems to be legal, resulting in a unpredictable segfault when the TSAN violation occurs here on the gc_all_tls_states
variable usage occurs in gc_should_mark
. Note that GC has exited already here on the master thread and it has resumed running code, while the worker thread is still trying to join marking:
(gdb) bt
#0 0xf764a804 in gc_should_mark () at /cache/build/builder-amdci4-6/julialang/julia-master/src/gc.c:3111
#1 0xf7653c6e in gc_should_mark () at /cache/build/builder-amdci4-6/julialang/julia-master/src/gc.c:3155
#2 gc_mark_loop_parallel (ptls=<optimized out>, ptls@entry=0xe5100610, master=<optimized out>, master@entry=0) at /cache/build/builder-amdci4-6/julialang/julia-master/src/gc.c:3148
#3 0xf7646f42 in jl_parallel_gc_threadfun (arg=0x97a0600) at /cache/build/builder-amdci4-6/julialang/julia-master/src/scheduler.c:147
#4 0xf7f7a0b4 in start_thread (arg=<optimized out>) at pthread_create.c:477
#5 0xf7e8a386 in clone () at ../sysdeps/unix/sysv/linux/i386/clone.S:108
(gdb) f 0
#0 0xf764a804 in gc_should_mark () at /cache/build/builder-amdci4-6/julialang/julia-master/src/gc.c:3111
3111 size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
(gdb) p jl_n_threads
$8 = 5
(gdb) p jl_n_threadpools
$9 = 2
(gdb) p jl_n_gcthreads
$10 = 1
(gdb) p jl_n_sweepthreads
$11 = 0
(gdb) p gc_all_tls_states
$16 = (jl_ptls_t *) 0x0
(gdb) p gc_n_threads_marking
$17 = 0
(gdb) p n_threads_marking
$18 = 1
(gdb) p gc_master_tid
$15 = -1
(gdb) p tid
$19 = 0
(gdb) jl_options
$7 = {quiet = 0 '\000', banner = -1 '\377', julia_bindir = 0x96dff90 "/cache/build/tester-amdci5-10/julialang/julia-master/julia-d30ccd386d/bin",
julia_bin = 0x96aa110 "/cache/build/tester-amdci5-10/julialang/julia-master/julia-d30ccd386d/bin/julia", cmds = 0x0, image_file = 0x96e0fa0 "/cache/build/tester-amdci5-10/julialang/julia-master/julia-d30ccd386d/lib/julia/sys.so",
cpu_target = 0x96a7f70 "native", nthreadpools = 0 '\000', nthreads = 0, nmarkthreads = 0, nsweepthreads = 0 '\000', nthreads_per_pool = 0x0, nprocs = 0, machine_file = 0x0, project = 0x0, isinteractive = 0 '\000', color = 0 '\000',
historyfile = 1 '\001', startupfile = 2 '\002', compile_enabled = 1 '\001', code_coverage = 0 '\000', malloc_log = 0 '\000', tracked_path = 0x0, opt_level = 2 '\002', opt_level_min = 0 '\000', debug_level = 1 '\001',
check_bounds = 0 '\000', depwarn = 2 '\002', warn_overwrite = 0 '\000', can_inline = 1 '\001', polly = 1 '\001', trace_compile = 0x0, fast_math = 0 '\000', worker = 0 '\000', cookie = 0x0, handle_signals = 1 '\001',
use_sysimage_native_code = 1 '\001', use_compiled_modules = 1 '\001', use_pkgimages = 1 '\001', bindto = 0x0, outputbc = 0x0, outputunoptbc = 0x0, outputo = 0x0, outputasm = 0x0, outputji = 0x0, output_code_coverage = 0x0,
incremental = 0 '\000', image_file_specified = 1 '\001', warn_scope = 1 '\001', image_codegen = 0 '\000', rr_detach = 1 '\001', strip_metadata = 0 '\000', strip_ir = 0 '\000', permalloc_pkgimg = 0 '\000', heap_size_hint = 0}