Skip to content

Commit c3155d6

Browse files
authored
gc scheduler synchronization fixes to 1.10 (JuliaLang#53661) (#139)
Cherry-pick the parts of JuliaLang#53355 which are relevant to the 1.10 GC.
1 parent ba5345f commit c3155d6

File tree

1 file changed

+23
-37
lines changed

1 file changed

+23
-37
lines changed

src/gc.c

Lines changed: 23 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2904,9 +2904,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29042904
jl_gc_markqueue_t *mq = &ptls->mark_queue;
29052905
jl_gc_markqueue_t *mq_master = NULL;
29062906
int master_tid = jl_atomic_load(&gc_master_tid);
2907-
if (master_tid == -1) {
2908-
return;
2909-
}
2907+
assert(master_tid != -1);
29102908
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
29112909
void *new_obj;
29122910
jl_gc_chunk_t c;
@@ -2997,7 +2995,7 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
29972995
* Correctness argument for the mark-loop termination protocol.
29982996
*
29992997
* Safety properties:
3000-
* - No work items shall be in any thread's queues when `gc_mark_loop_barrier` observes
2998+
* - No work items shall be in any thread's queues when `gc_should_mark` observes
30012999
* that `gc_n_threads_marking` is zero.
30023000
*
30033001
* - No work item shall be stolen from the master thread (i.e. mutator thread which started
@@ -3008,50 +3006,38 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
30083006
* and that no work is stolen from us at that point.
30093007
*
30103008
* Proof:
3011-
* - Suppose the master thread observes that `gc_n_threads_marking` is zero in
3012-
* `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
3013-
* Since threads try to steal from all threads' queues, this implies that all threads must
3014-
* have tried to steal from the queue which still has a work item left, but failed to do so,
3015-
* which violates the semantics of Chase-Lev's work-stealing queue.
3016-
*
3017-
* - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
3018-
* "master thread observes that `gc_n_threads_marking` is zero". Since we're using
3019-
* sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
3020-
* `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
3021-
* increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
3022-
* event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
3023-
* `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
3024-
* the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
3025-
* and therefore won't enter the mark-loop.
3009+
* - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
3010+
* means that no thread has work on their queue, this is guaranteed because a thread may only exit
3011+
* `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
3012+
* seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
3013+
* guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
3014+
* because incrementing is only legal from inside the lock. Therefore, no thread will reenter
3015+
* the mark-loop after `gc_n_threads_marking` reaches zero.
30263016
*/
30273017

3028-
int gc_should_mark(jl_ptls_t ptls)
3018+
int gc_should_mark(void)
30293019
{
30303020
int should_mark = 0;
3031-
int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
3032-
// fast path
3033-
if (n_threads_marking == 0) {
3034-
return 0;
3035-
}
30363021
uv_mutex_lock(&gc_queue_observer_lock);
30373022
while (1) {
3038-
int tid = jl_atomic_load(&gc_master_tid);
3039-
// fast path
3040-
if (tid == -1) {
3041-
break;
3042-
}
3043-
n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
3044-
// fast path
3023+
int n_threads_marking = jl_atomic_load(&gc_n_threads_marking);
30453024
if (n_threads_marking == 0) {
30463025
break;
30473026
}
3027+
int tid = jl_atomic_load_relaxed(&gc_master_tid);
3028+
assert(tid != -1);
30483029
size_t work = gc_count_work_in_queue(gc_all_tls_states[tid]);
30493030
for (tid = gc_first_tid; tid < gc_first_tid + jl_n_gcthreads; tid++) {
3050-
work += gc_count_work_in_queue(gc_all_tls_states[tid]);
3031+
jl_ptls_t ptls2 = gc_all_tls_states[tid];
3032+
if (ptls2 == NULL) {
3033+
continue;
3034+
}
3035+
work += gc_count_work_in_queue(ptls2);
30513036
}
30523037
// if there is a lot of work left, enter the mark loop
30533038
if (work >= 16 * n_threads_marking) {
3054-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
3039+
jl_atomic_fetch_add(&gc_n_threads_marking, 1); // A possibility would be to allow a thread that found lots
3040+
// of work to increment this
30553041
should_mark = 1;
30563042
break;
30573043
}
@@ -3063,22 +3049,22 @@ int gc_should_mark(jl_ptls_t ptls)
30633049

30643050
void gc_wake_all_for_marking(jl_ptls_t ptls)
30653051
{
3066-
jl_atomic_store(&gc_master_tid, ptls->tid);
30673052
uv_mutex_lock(&gc_threads_lock);
3068-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
30693053
uv_cond_broadcast(&gc_threads_cond);
30703054
uv_mutex_unlock(&gc_threads_lock);
30713055
}
30723056

30733057
void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
30743058
{
30753059
if (master) {
3060+
jl_atomic_store(&gc_master_tid, ptls->tid);
3061+
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
30763062
gc_wake_all_for_marking(ptls);
30773063
gc_mark_and_steal(ptls);
30783064
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
30793065
}
30803066
while (1) {
3081-
int should_mark = gc_should_mark(ptls);
3067+
int should_mark = gc_should_mark();
30823068
if (!should_mark) {
30833069
break;
30843070
}

0 commit comments

Comments
 (0)