@@ -26,6 +26,10 @@ _Atomic(int) gc_n_threads_sweeping;
26
26
_Atomic(jl_gc_padded_page_stack_t * ) gc_allocd_scratch ;
27
27
// `tid` of mutator thread that triggered GC
28
28
_Atomic(int ) gc_master_tid ;
29
+ // counter for sharing work when sweeping stacks
30
+ _Atomic(int ) gc_ptls_sweep_idx ;
31
+ // counter for round robin of giving back stack pages to the OS
32
+ _Atomic(int ) gc_stack_free_idx ;
29
33
// `tid` of first GC thread
30
34
int gc_first_tid ;
31
35
// Mutex/cond used to synchronize wakeup of GC threads on parallel marking
@@ -1525,6 +1529,44 @@ static void gc_sweep_other(jl_ptls_t ptls, int sweep_full) JL_NOTSAFEPOINT
1525
1529
gc_num .total_sweep_free_mallocd_memory_time += t_free_mallocd_memory_end - t_free_mallocd_memory_start ;
1526
1530
}
1527
1531
1532
+ // wake up all threads to sweep the stacks
1533
+ void gc_sweep_wake_all_stacks (jl_ptls_t ptls ) JL_NOTSAFEPOINT
1534
+ {
1535
+ uv_mutex_lock (& gc_threads_lock );
1536
+ int first = gc_first_parallel_collector_thread_id ();
1537
+ int last = gc_last_parallel_collector_thread_id ();
1538
+ for (int i = first ; i <= last ; i ++ ) {
1539
+ jl_ptls_t ptls2 = gc_all_tls_states [i ];
1540
+ gc_check_ptls_of_parallel_collector_thread (ptls2 );
1541
+ jl_atomic_fetch_add (& ptls2 -> gc_tls .gc_stack_sweep_requested , 1 );
1542
+ }
1543
+ uv_cond_broadcast (& gc_threads_cond );
1544
+ uv_mutex_unlock (& gc_threads_lock );
1545
+ return ;
1546
+ }
1547
+
1548
+ void gc_sweep_wait_for_all_stacks (void ) JL_NOTSAFEPOINT
1549
+ {
1550
+ while ((jl_atomic_load_acquire (& gc_ptls_sweep_idx )>= 0 ) || jl_atomic_load_acquire (& gc_n_threads_sweeping ) != 0 ) {
1551
+ jl_cpu_pause ();
1552
+ }
1553
+ }
1554
+
1555
+ void sweep_stack_pools (jl_ptls_t ptls ) JL_NOTSAFEPOINT
1556
+ {
1557
+ // initialize ptls index for parallel sweeping of stack pools
1558
+ assert (gc_n_threads );
1559
+ int stack_free_idx = jl_atomic_load_relaxed (& gc_stack_free_idx );
1560
+ if (stack_free_idx + 1 == gc_n_threads )
1561
+ jl_atomic_store_relaxed (& gc_stack_free_idx , 0 );
1562
+ else
1563
+ jl_atomic_store_relaxed (& gc_stack_free_idx , stack_free_idx + 1 );
1564
+ jl_atomic_store_release (& gc_ptls_sweep_idx , gc_n_threads - 1 ); // idx == gc_n_threads = release stacks to the OS so it's serial
1565
+ gc_sweep_wake_all_stacks (ptls );
1566
+ sweep_stack_pool_loop ();
1567
+ gc_sweep_wait_for_all_stacks ();
1568
+ }
1569
+
1528
1570
static void gc_pool_sync_nfree (jl_gc_pagemeta_t * pg , jl_taggedvalue_t * last ) JL_NOTSAFEPOINT
1529
1571
{
1530
1572
assert (pg -> fl_begin_offset != UINT16_MAX );
@@ -3604,7 +3646,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
3604
3646
#endif
3605
3647
current_sweep_full = sweep_full ;
3606
3648
sweep_weak_refs ();
3607
- sweep_stack_pools ();
3649
+ sweep_stack_pools (ptls );
3608
3650
gc_sweep_foreign_objs ();
3609
3651
gc_sweep_other (ptls , sweep_full );
3610
3652
gc_scrub ();
0 commit comments