@@ -2904,9 +2904,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
2904
2904
jl_gc_markqueue_t * mq = & ptls -> mark_queue ;
2905
2905
jl_gc_markqueue_t * mq_master = NULL ;
2906
2906
int master_tid = jl_atomic_load (& gc_master_tid );
2907
- if (master_tid == -1 ) {
2908
- return ;
2909
- }
2907
+ assert (master_tid != -1 );
2910
2908
mq_master = & gc_all_tls_states [master_tid ]-> mark_queue ;
2911
2909
void * new_obj ;
2912
2910
jl_gc_chunk_t c ;
@@ -2997,7 +2995,7 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
2997
2995
* Correctness argument for the mark-loop termination protocol.
2998
2996
*
2999
2997
* Safety properties:
3000
- * - No work items shall be in any thread's queues when `gc_mark_loop_barrier ` observes
2998
+ * - No work items shall be in any thread's queues when `gc_should_mark ` observes
3001
2999
* that `gc_n_threads_marking` is zero.
3002
3000
*
3003
3001
* - No work item shall be stolen from the master thread (i.e. mutator thread which started
@@ -3008,50 +3006,38 @@ size_t gc_count_work_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
3008
3006
* and that no work is stolen from us at that point.
3009
3007
*
3010
3008
* Proof:
3011
- * - Suppose the master thread observes that `gc_n_threads_marking` is zero in
3012
- * `gc_mark_loop_barrier` and there is a work item left in one thread's queue at that point.
3013
- * Since threads try to steal from all threads' queues, this implies that all threads must
3014
- * have tried to steal from the queue which still has a work item left, but failed to do so,
3015
- * which violates the semantics of Chase-Lev's work-stealing queue.
3016
- *
3017
- * - Let E1 be the event "master thread writes -1 to gc_master_tid" and E2 be the event
3018
- * "master thread observes that `gc_n_threads_marking` is zero". Since we're using
3019
- * sequentially consistent atomics, E1 => E2. Now suppose one thread which is spinning in
3020
- * `gc_should_mark` tries to enter the mark-loop after E2. In order to do so, it must
3021
- * increment `gc_n_threads_marking` to 1 in an event E3, and then read `gc_master_tid` in an
3022
- * event E4. Since we're using sequentially consistent atomics, E3 => E4. Since we observed
3023
- * `gc_n_threads_marking` as zero in E2, then E2 => E3, and we conclude E1 => E4, so that
3024
- * the thread which is spinning in `gc_should_mark` must observe that `gc_master_tid` is -1
3025
- * and therefore won't enter the mark-loop.
3009
+ * - If a thread observes that `gc_n_threads_marking` is zero inside `gc_should_mark`, that
3010
+ * means that no thread has work on their queue, this is guaranteed because a thread may only exit
3011
+ * `gc_mark_and_steal` when its own queue is empty, this information is synchronized by the
3012
+ * seq-cst fetch_add to a thread that is in `gc_should_mark`. `gc_queue_observer_lock`
3013
+ * guarantees that once `gc_n_threads_marking` reaches zero, no thread will increment it again,
3014
+ * because incrementing is only legal from inside the lock. Therefore, no thread will reenter
3015
+ * the mark-loop after `gc_n_threads_marking` reaches zero.
3026
3016
*/
3027
3017
3028
- int gc_should_mark (jl_ptls_t ptls )
3018
+ int gc_should_mark (void )
3029
3019
{
3030
3020
int should_mark = 0 ;
3031
- int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
3032
- // fast path
3033
- if (n_threads_marking == 0 ) {
3034
- return 0 ;
3035
- }
3036
3021
uv_mutex_lock (& gc_queue_observer_lock );
3037
3022
while (1 ) {
3038
- int tid = jl_atomic_load (& gc_master_tid );
3039
- // fast path
3040
- if (tid == -1 ) {
3041
- break ;
3042
- }
3043
- n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
3044
- // fast path
3023
+ int n_threads_marking = jl_atomic_load (& gc_n_threads_marking );
3045
3024
if (n_threads_marking == 0 ) {
3046
3025
break ;
3047
3026
}
3027
+ int tid = jl_atomic_load_relaxed (& gc_master_tid );
3028
+ assert (tid != -1 );
3048
3029
size_t work = gc_count_work_in_queue (gc_all_tls_states [tid ]);
3049
3030
for (tid = gc_first_tid ; tid < gc_first_tid + jl_n_gcthreads ; tid ++ ) {
3050
- work += gc_count_work_in_queue (gc_all_tls_states [tid ]);
3031
+ jl_ptls_t ptls2 = gc_all_tls_states [tid ];
3032
+ if (ptls2 == NULL ) {
3033
+ continue ;
3034
+ }
3035
+ work += gc_count_work_in_queue (ptls2 );
3051
3036
}
3052
3037
// if there is a lot of work left, enter the mark loop
3053
3038
if (work >= 16 * n_threads_marking ) {
3054
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3039
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 ); // A possibility would be to allow a thread that found lots
3040
+ // of work to increment this
3055
3041
should_mark = 1 ;
3056
3042
break ;
3057
3043
}
@@ -3063,22 +3049,22 @@ int gc_should_mark(jl_ptls_t ptls)
3063
3049
3064
3050
void gc_wake_all_for_marking (jl_ptls_t ptls )
3065
3051
{
3066
- jl_atomic_store (& gc_master_tid , ptls -> tid );
3067
3052
uv_mutex_lock (& gc_threads_lock );
3068
- jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3069
3053
uv_cond_broadcast (& gc_threads_cond );
3070
3054
uv_mutex_unlock (& gc_threads_lock );
3071
3055
}
3072
3056
3073
3057
void gc_mark_loop_parallel (jl_ptls_t ptls , int master )
3074
3058
{
3075
3059
if (master ) {
3060
+ jl_atomic_store (& gc_master_tid , ptls -> tid );
3061
+ jl_atomic_fetch_add (& gc_n_threads_marking , 1 );
3076
3062
gc_wake_all_for_marking (ptls );
3077
3063
gc_mark_and_steal (ptls );
3078
3064
jl_atomic_fetch_add (& gc_n_threads_marking , -1 );
3079
3065
}
3080
3066
while (1 ) {
3081
- int should_mark = gc_should_mark (ptls );
3067
+ int should_mark = gc_should_mark ();
3082
3068
if (!should_mark ) {
3083
3069
break ;
3084
3070
}
0 commit comments