From 3b61fad895e590e3ee42520cba1bc664e788f2c5 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 28 Dec 2023 22:00:47 +0800 Subject: [PATCH 01/48] WIP: Add ScheduleCollection when requesting GC --- src/mmtk.rs | 2 +- src/plan/gc_requester.rs | 29 +++++++++++------------------ src/scheduler/controller.rs | 16 +--------------- src/scheduler/scheduler.rs | 29 ++--------------------------- 4 files changed, 15 insertions(+), 61 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index fa90270637..3d91f23ae8 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -148,7 +148,7 @@ impl MMTK { let state = Arc::new(GlobalState::default()); - let gc_requester = Arc::new(GCRequester::new()); + let gc_requester = Arc::new(GCRequester::new(scheduler.clone())); let gc_trigger = Arc::new(GCTrigger::new( options.clone(), diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 2e5f518cb2..0bae785202 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -1,7 +1,9 @@ +use crate::scheduler::gc_work::ScheduleCollection; +use crate::scheduler::{GCWorkScheduler, WorkBucketStage}; use crate::vm::VMBinding; use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Condvar, Mutex}; +use std::sync::{Arc, Mutex}; struct RequestSync { request_count: isize, @@ -12,27 +14,20 @@ struct RequestSync { /// and the GC coordinator thread waits for GC requests using this object. pub struct GCRequester { request_sync: Mutex, - request_condvar: Condvar, request_flag: AtomicBool, + scheduler: Arc>, phantom: PhantomData, } -// Clippy says we need this... -impl Default for GCRequester { - fn default() -> Self { - Self::new() - } -} - impl GCRequester { - pub fn new() -> Self { + pub fn new(scheduler: Arc>) -> Self { GCRequester { request_sync: Mutex::new(RequestSync { request_count: 0, last_request_count: -1, }), - request_condvar: Condvar::new(), request_flag: AtomicBool::new(false), + scheduler, phantom: PhantomData, } } @@ -46,7 +41,8 @@ impl GCRequester { if !self.request_flag.load(Ordering::Relaxed) { self.request_flag.store(true, Ordering::Relaxed); guard.request_count += 1; - self.request_condvar.notify_all(); + + self.schedule_collection(); } } @@ -56,11 +52,8 @@ impl GCRequester { drop(guard); } - pub fn wait_for_request(&self) { - let mut guard = self.request_sync.lock().unwrap(); - guard.last_request_count += 1; - while guard.last_request_count == guard.request_count { - guard = self.request_condvar.wait(guard).unwrap(); - } + fn schedule_collection(&self) { + // Add a ScheduleCollection work packet. It is the seed of other work packets. + self.scheduler.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); } } diff --git a/src/scheduler/controller.rs b/src/scheduler/controller.rs index 3608f1ebfb..acc5426264 100644 --- a/src/scheduler/controller.rs +++ b/src/scheduler/controller.rs @@ -27,20 +27,6 @@ pub struct GCController { } impl GCController { - pub(crate) fn new( - mmtk: &'static MMTK, - requester: Arc>, - scheduler: Arc>, - coordinator_worker: GCWorker, - ) -> Box> { - Box::new(Self { - mmtk, - requester, - scheduler, - coordinator_worker, - }) - } - /// The main loop for the GC controller. pub fn run(&mut self, tls: VMWorkerThread) -> ! { probe!(mmtk, gccontroller_run); @@ -50,7 +36,7 @@ impl GCController { loop { debug!("[STWController: Waiting for request...]"); - self.requester.wait_for_request(); + //self.requester.wait_for_request(); debug!("[STWController: Request recieved.]"); self.do_gc_until_completion_traced(); diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 5c71296eca..46357e4a46 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -6,9 +6,8 @@ use crate::mmtk::MMTK; use crate::util::opaque_pointer::*; use crate::util::options::AffinityKind; use crate::util::rust_util::array_from_fn; -use crate::vm::Collection; -use crate::vm::{GCThreadContext, VMBinding}; -use crossbeam::deque::{self, Steal}; +use crate::vm::VMBinding; +use crossbeam::deque::Steal; use enum_map::{Enum, EnumMap}; use std::collections::HashMap; use std::sync::Arc; @@ -18,8 +17,6 @@ pub struct GCWorkScheduler { pub work_buckets: EnumMap>, /// Workers pub(crate) worker_group: Arc>, - /// The shared part of the GC worker object of the controller thread - coordinator_worker_shared: Arc>, /// Condition Variable for worker synchronization pub(crate) worker_monitor: Arc, /// How to assign the affinity of each GC thread. Specified by the user. @@ -70,7 +67,6 @@ impl GCWorkScheduler { Arc::new(Self { work_buckets, worker_group, - coordinator_worker_shared, worker_monitor, affinity, }) @@ -82,23 +78,6 @@ impl GCWorkScheduler { /// Create GC threads, including the controller thread and all workers. pub fn spawn_gc_threads(self: &Arc, mmtk: &'static MMTK, tls: VMThread) { - // Spawn the controller thread. - let coordinator_worker = GCWorker::new( - mmtk, - usize::MAX, - self.clone(), - true, - self.coordinator_worker_shared.clone(), - deque::Worker::new_fifo(), - ); - let gc_controller = GCController::new( - mmtk, - mmtk.gc_requester.clone(), - self.clone(), - coordinator_worker, - ); - VM::VMCollection::spawn_gc_thread(tls, GCThreadContext::::Controller(gc_controller)); - self.worker_group.spawn(mmtk, tls) } @@ -379,8 +358,6 @@ impl GCWorkScheduler { let worker_stat = worker.borrow_stat(); worker_stat.enable(); } - let coordinator_worker_stat = self.coordinator_worker_shared.borrow_stat(); - coordinator_worker_stat.enable(); } pub fn statistics(&self) -> HashMap { @@ -389,8 +366,6 @@ impl GCWorkScheduler { let worker_stat = worker.borrow_stat(); summary.merge(&worker_stat); } - let coordinator_worker_stat = self.coordinator_worker_shared.borrow_stat(); - summary.merge(&coordinator_worker_stat); summary.harness_stat() } From 18f7bb610f9aac268518951f718c4b19122e12d4 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 28 Dec 2023 23:05:05 +0800 Subject: [PATCH 02/48] WIP: Rework GC triggering --- src/plan/gc_requester.rs | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 0bae785202..43f7c5a5ac 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -6,12 +6,11 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; struct RequestSync { - request_count: isize, - last_request_count: isize, + /// Is GC scheduled (but not finished)? + gc_scheduled: bool, } -/// GC requester. This object allows other threads to request (trigger) GC, -/// and the GC coordinator thread waits for GC requests using this object. +/// This data structure lets mutators trigger GC, and may schedule collection when appropriate. pub struct GCRequester { request_sync: Mutex, request_flag: AtomicBool, @@ -23,8 +22,7 @@ impl GCRequester { pub fn new(scheduler: Arc>) -> Self { GCRequester { request_sync: Mutex::new(RequestSync { - request_count: 0, - last_request_count: -1, + gc_scheduled: true, }), request_flag: AtomicBool::new(false), scheduler, @@ -38,22 +36,37 @@ impl GCRequester { } let mut guard = self.request_sync.lock().unwrap(); + // Note: This is the double-checked locking algorithm. + // The load has the `Relaxed` order instead of `Acquire` because we only use the flag to + // remove successive requests, but we don't use it to synchronize other data fields. if !self.request_flag.load(Ordering::Relaxed) { self.request_flag.store(true, Ordering::Relaxed); - guard.request_count += 1; - self.schedule_collection(); + self.try_schedule_collection(&mut *guard); } } pub fn clear_request(&self) { - let guard = self.request_sync.lock().unwrap(); + let _guard = self.request_sync.lock().unwrap(); self.request_flag.store(false, Ordering::Relaxed); - drop(guard); } - fn schedule_collection(&self) { - // Add a ScheduleCollection work packet. It is the seed of other work packets. - self.scheduler.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); + pub fn on_gc_finished(&self) { + let mut guard = self.request_sync.lock().unwrap(); + guard.gc_scheduled = false; + + self.try_schedule_collection(&mut *guard); + } + + fn try_schedule_collection(&self, sync: &mut RequestSync) { + if self.request_flag.load(Ordering::Relaxed) && !sync.gc_scheduled { + // Add a ScheduleCollection work packet. It is the seed of other work packets. + self.scheduler.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); + + sync.gc_scheduled = true; + + // Note: We do not clear `request_flag` now. It will be cleared by `clear_request` + // after all mutators have stopped. + } } } From ffe8f33342f167c1662094c22e76198903adac61 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 2 Jan 2024 19:36:30 +0800 Subject: [PATCH 03/48] Working --- src/global_state.rs | 7 ++ src/plan/gc_requester.rs | 30 ++++++-- src/scheduler/controller.rs | 14 ++-- src/scheduler/gc_work.rs | 10 +++ src/scheduler/scheduler.rs | 137 ++++++++++++++++++++++++++++++++++-- src/scheduler/worker.rs | 114 ++++++++---------------------- 6 files changed, 210 insertions(+), 102 deletions(-) diff --git a/src/global_state.rs b/src/global_state.rs index e066811557..2340957eae 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -1,5 +1,8 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Mutex; +use std::time::Instant; + +use atomic_refcell::AtomicRefCell; /// This stores some global states for an MMTK instance. /// Some MMTK components like plans and allocators may keep an reference to the struct, and can access it. @@ -40,6 +43,9 @@ pub struct GlobalState { pub(crate) stacks_prepared: AtomicBool, /// A counter that keeps tracks of the number of bytes allocated since last stress test pub(crate) allocation_bytes: AtomicUsize, + /// The time when the current GC started. Only accessible in `ScheduleCollection` and + /// `GCWorkScheduler::on_gc_finished` which never happen at the same time. + pub(crate) gc_start_time: AtomicRefCell>, /// A counteer that keeps tracks of the number of bytes allocated by malloc #[cfg(feature = "malloc_counted_size")] pub(crate) malloc_bytes: AtomicUsize, @@ -214,6 +220,7 @@ impl Default for GlobalState { cur_collection_attempts: AtomicUsize::new(0), scanned_stacks: AtomicUsize::new(0), allocation_bytes: AtomicUsize::new(0), + gc_start_time: AtomicRefCell::new(None), #[cfg(feature = "malloc_counted_size")] malloc_bytes: AtomicUsize::new(0), #[cfg(feature = "count_live_bytes_in_gc")] diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 43f7c5a5ac..53419aa486 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -1,5 +1,4 @@ -use crate::scheduler::gc_work::ScheduleCollection; -use crate::scheduler::{GCWorkScheduler, WorkBucketStage}; +use crate::scheduler::GCWorkScheduler; use crate::vm::VMBinding; use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, Ordering}; @@ -22,7 +21,7 @@ impl GCRequester { pub fn new(scheduler: Arc>) -> Self { GCRequester { request_sync: Mutex::new(RequestSync { - gc_scheduled: true, + gc_scheduled: false, }), request_flag: AtomicBool::new(false), scheduler, @@ -30,6 +29,8 @@ impl GCRequester { } } + /// Request a GC. Called by mutators when polling (during allocation) and when handling user + /// GC requests (e.g. `System.gc();` in Java); pub fn request(&self) { if self.request_flag.load(Ordering::Relaxed) { return; @@ -46,11 +47,28 @@ impl GCRequester { } } + /// Returns true if GC has been scheduled. + pub fn is_gc_scheduled(&self) -> bool { + let guard = self.request_sync.lock().unwrap(); + guard.gc_scheduled + } + + /// Clear the "GC requested" flag so that mutators can trigger the next GC. + /// Called by a GC worker when all mutators have come to a stop. pub fn clear_request(&self) { let _guard = self.request_sync.lock().unwrap(); self.request_flag.store(false, Ordering::Relaxed); } + /// Called by a GC worker when a GC has finished. + /// This will check the `request_flag` again and schedule the next GC. + /// + /// Note that this may schedule the next GC immediately if + /// 1. The plan is concurrent, and a mutator triggered another GC while the current GC was + /// still running (between `clear_request` and `on_gc_finished`), or + /// 2. After the invocation of `resume_mutators`, a mutator runs so fast that it + /// exhausted the heap, or called `handle_user_collection_request`, before this function + /// is called. pub fn on_gc_finished(&self) { let mut guard = self.request_sync.lock().unwrap(); guard.gc_scheduled = false; @@ -59,9 +77,11 @@ impl GCRequester { } fn try_schedule_collection(&self, sync: &mut RequestSync) { + // Do not schedule collection if a GC is still in progress. + // When the GC finishes, a GC worker will call `on_gc_finished` and check `request_flag` + // again. if self.request_flag.load(Ordering::Relaxed) && !sync.gc_scheduled { - // Add a ScheduleCollection work packet. It is the seed of other work packets. - self.scheduler.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); + self.scheduler.schedule_collection(); sync.gc_scheduled = true; diff --git a/src/scheduler/controller.rs b/src/scheduler/controller.rs index acc5426264..125efbbe91 100644 --- a/src/scheduler/controller.rs +++ b/src/scheduler/controller.rs @@ -75,17 +75,17 @@ impl GCController { fn do_gc_until_completion(&mut self) { let gc_start = std::time::Instant::now(); - debug_assert!( - self.scheduler.worker_monitor.debug_is_sleeping(), - "Workers are still doing work when GC started." - ); + // debug_assert!( + // self.scheduler.worker_monitor.debug_is_sleeping(), + // "Workers are still doing work when GC started." + // ); // Add a ScheduleCollection work packet. It is the seed of other work packets. self.scheduler.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); // Notify only one worker at this time because there is only one work packet, // namely `ScheduleCollection`. - self.scheduler.worker_monitor.resume_and_wait(false); + // self.scheduler.worker_monitor.resume_and_wait(false); // Gradually open more buckets as workers stop each time they drain all open bucket. loop { @@ -102,11 +102,11 @@ impl GCController { // Notify all workers because there should be many work packets available in the newly // opened bucket(s). - self.scheduler.worker_monitor.resume_and_wait(true); + // self.scheduler.worker_monitor.resume_and_wait(true); } // All GC workers must have parked by now. - debug_assert!(self.scheduler.worker_monitor.debug_is_sleeping()); + // debug_assert!(self.scheduler.worker_monitor.debug_is_sleeping()); debug_assert!(!self.scheduler.worker_group.has_designated_work()); debug_assert!(self.scheduler.all_buckets_empty()); diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs index 692873eb84..a651437911 100644 --- a/src/scheduler/gc_work.rs +++ b/src/scheduler/gc_work.rs @@ -14,6 +14,16 @@ pub struct ScheduleCollection; impl GCWork for ScheduleCollection { fn do_work(&mut self, worker: &mut GCWorker, mmtk: &'static MMTK) { + // Record the time when GC starts. + { + let mut guard = mmtk.state.gc_start_time.borrow_mut(); + let old_time = guard.replace(std::time::Instant::now()); + debug_assert!( + old_time.is_none(), + "gc_start_time is still set when GC started: {old_time:?}", + ); + } + // Tell GC trigger that GC started. mmtk.gc_trigger.policy.on_gc_start(mmtk); diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 46357e4a46..3372ff9770 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -1,12 +1,16 @@ +use super::gc_work::ScheduleCollection; use super::stat::SchedulerStat; use super::work_bucket::*; -use super::worker::{GCWorker, GCWorkerShared, ThreadId, WorkerGroup, WorkerMonitor}; +use super::worker::{GCWorker, ThreadId, WorkerGroup, WorkerMonitor}; use super::*; +use crate::global_state::GcStatus; use crate::mmtk::MMTK; use crate::util::opaque_pointer::*; use crate::util::options::AffinityKind; use crate::util::rust_util::array_from_fn; +use crate::vm::Collection; use crate::vm::VMBinding; +use crate::Plan; use crossbeam::deque::Steal; use enum_map::{Enum, EnumMap}; use std::collections::HashMap; @@ -62,8 +66,6 @@ impl GCWorkScheduler { } } - let coordinator_worker_shared = Arc::new(GCWorkerShared::::new(None)); - Arc::new(Self { work_buckets, worker_group, @@ -86,9 +88,17 @@ impl GCWorkScheduler { self.affinity.resolve_affinity(thread); } + /// Schedule collection. Called via `GCRequester`. + /// Because this function may be called by a mutator thread, we only add a `ScheduleCollection` + /// work packet here so that a GC worker can wake up later and actually schedule the work for a + /// collection. + pub(crate) fn schedule_collection(&self) { + // Add a ScheduleCollection work packet. It is the seed of other work packets. + self.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); + } + /// Schedule all the common work packets pub fn schedule_common_work>(&self, plan: &'static C::PlanType) { - use crate::plan::Plan; use crate::scheduler::gc_work::*; // Stop & scan mutators (mutator scanning can happen before STW) self.work_buckets[WorkBucketStage::Unconstrained].add(StopMutators::::new()); @@ -349,8 +359,125 @@ impl GCWorkScheduler { return work; } - self.worker_monitor.park_and_wait(worker); + self.worker_monitor.park_and_wait(worker, || { + // This is the last worker parked. + + // Test whether we are doing GC. + if worker.mmtk.gc_requester.is_gc_scheduled() { + trace!("GC is scheduled. Try to find more work to do..."); + // We are in the middle of GC, and the last GC worker parked. + // Find more work for workers to do. + let found_more_work = self.find_more_work_for_workers(); + + if !found_more_work { + // GC finished. + self.on_gc_finished(worker); + } + + found_more_work + } else { + trace!("GC is not scheduled. Wait for the first GC."); + // GC is not scheduled. Do nothing. + // Note that when GC worker threads has just been created, they will try to get + // work packets to execute. But since the first GC has not started, yet, there + // is not any work packets to execute, yet. Therefore, all workers will park, + // and the last parked worker will reach here. In that case, we should simply + // let workers wait until the first GC starts, instead of trying to open more + // buckets. + // If a GC worker spuriously wakes up when GC is not scheduled, it should not + // do anything, either. + false + } + }); + } + } + + /// Find more work for workers to do. Return true if more work is available. + fn find_more_work_for_workers(&self) -> bool { + if self.worker_group.has_designated_work() { + return true; } + + // See if any bucket has a sentinel. + if self.schedule_sentinels() { + return true; + } + + // Try to open new buckets. + if self.update_buckets() { + return true; + } + + // If all of the above failed, it means GC has finished. + false + } + + /// Called when GC has finished, i.e. when all work packets have been executed. + fn on_gc_finished(&self, worker: &GCWorker) { + // All GC workers (except this one) must have parked by now. + debug_assert!(!self.worker_group.has_designated_work()); + debug_assert!(self.all_buckets_empty()); + + // Deactivate all work buckets to prepare for the next GC. + self.deactivate_all(); + self.debug_assert_all_buckets_deactivated(); + + let mmtk = worker.mmtk; + + // Tell GC trigger that GC ended - this happens before we resume mutators. + mmtk.gc_trigger.policy.on_gc_end(mmtk); + + // Compute the elapsed time of the GC. + let gc_start = { + let mut guard = mmtk.state.gc_start_time.borrow_mut(); + guard.take().expect("gc_start_time was not set") + }; + let elapsed = gc_start.elapsed(); + + info!( + "End of GC ({}/{} pages, took {} ms)", + mmtk.get_plan().get_reserved_pages(), + mmtk.get_plan().get_total_pages(), + elapsed.as_millis() + ); + + #[cfg(feature = "count_live_bytes_in_gc")] + { + let live_bytes = mmtk.state.get_live_bytes_in_last_gc(); + let used_bytes = + mmtk.get_plan().get_used_pages() << crate::util::constants::LOG_BYTES_IN_PAGE; + debug_assert!( + live_bytes <= used_bytes, + "Live bytes of all live objects ({} bytes) is larger than used pages ({} bytes), something is wrong.", + live_bytes, used_bytes + ); + info!( + "Live objects = {} bytes ({:04.1}% of {} used pages)", + live_bytes, + live_bytes as f64 * 100.0 / used_bytes as f64, + mmtk.get_plan().get_used_pages() + ); + } + + // All other workers are parked, so it is safe to access the Plan instance mutably. + let plan_mut: &mut dyn Plan = unsafe { mmtk.get_plan_mut() }; + plan_mut.end_of_gc(worker.tls); + + #[cfg(feature = "extreme_assertions")] + if crate::util::edge_logger::should_check_duplicate_edges(mmtk.get_plan()) { + // reset the logging info at the end of each GC + mmtk.edge_logger.reset(); + } + + // Reset the triggering information. + mmtk.state.reset_collection_trigger(); + + // Set to NotInGC after everything, and right before resuming mutators. + mmtk.set_gc_status(GcStatus::NotInGC); + ::VMCollection::resume_mutators(worker.tls); + + // Notify the `GCRequester` that GC has finished. + mmtk.gc_requester.on_gc_finished(); } pub fn enable_stat(&self) { diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index e415a14e46..3f483f7200 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -12,7 +12,7 @@ use crossbeam::queue::ArrayQueue; #[cfg(feature = "count_live_bytes_in_gc")] use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{Arc, Condvar, Mutex, MutexGuard}; /// Represents the ID of a GC worker thread. pub type ThreadId = usize; @@ -75,40 +75,14 @@ impl GCWorkerShared { } } -/// Used to synchronize mutually exclusive operations between workers and controller, -/// and also waking up workers when more work packets are available. +/// A data structure for parking and waking up workers. +/// It keeps track of the number of workers parked, and allow the last parked worker to perform +/// operations that require all other workers to be parked. pub(crate) struct WorkerMonitor { /// The synchronized part. sync: Mutex, /// This is notified when new work is made available for the workers. - /// Particularly, it is notified when - /// - `sync.worker_group_state` is transitioned to `Working` because - /// - some workers still have designated work, or - /// - some sentinel work packets are added to their drained buckets, or - /// - some work buckets are opened, or - /// - any work packet is added to any open bucket. - /// Workers wait on this condvar. work_available: Condvar, - /// This is notified when all workers parked. - /// The coordinator waits on this condvar. - all_workers_parked: Condvar, -} - -/// The state of the worker group. -/// -/// The worker group alternates between the `Sleeping` and the `Working` state. Workers are -/// allowed to execute work packets in the `Working` state. However, once workers entered the -/// `Sleeping` state, they will not be allowed to packets from buckets until the coordinator -/// explicitly transitions the state back to `Working` after it found more work for workers to do. -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -enum WorkerGroupState { - /// In this state, the coordinator can open new buckets and close buckets, - /// but workers cannot execute any packets or get any work packets from any buckets. - /// Workers cannot unpark in this state. - Sleeping, - /// In this state, workers can get work packets from open buckets, - /// but no buckets can be opened or closed. - Working, } /// The synchronized part of `WorkerMonitor`. @@ -117,8 +91,6 @@ pub(crate) struct WorkerMonitorSync { worker_count: usize, /// Number of parked workers. parked_workers: usize, - /// The worker group state. - worker_group_state: WorkerGroupState, } impl WorkerMonitor { @@ -127,61 +99,37 @@ impl WorkerMonitor { sync: Mutex::new(WorkerMonitorSync { worker_count, parked_workers: 0, - worker_group_state: WorkerGroupState::Sleeping, }), work_available: Default::default(), - all_workers_parked: Default::default(), } } /// Wake up workers when more work packets are made available for workers. /// This function is called when adding work packets to buckets. - /// This function doesn't change the `work_group_state` variable. - /// If workers are in the `Sleeping` state, use `resume_and_wait` to resume workers. pub fn notify_work_available(&self, all: bool) { - let sync = self.sync.lock().unwrap(); - - // Don't notify workers if we are adding packets when workers are sleeping. - // This could happen when we add `ScheduleCollection` or schedule sentinels. - if sync.worker_group_state == WorkerGroupState::Sleeping { - return; - } - - if all { - self.work_available.notify_all(); - } else { - self.work_available.notify_one(); - } + let mut guard = self.sync.lock().unwrap(); + self.notify_work_available_inner(all, &mut guard); } - /// Wake up workers and wait until they transition to `Sleeping` state again. - /// This is called by the coordinator. - /// If `all` is true, notify all workers; otherwise only notify one worker. - pub fn resume_and_wait(&self, all: bool) { - let mut sync = self.sync.lock().unwrap(); - sync.worker_group_state = WorkerGroupState::Working; + fn notify_work_available_inner(&self, all: bool, _guard: &mut MutexGuard) { if all { self.work_available.notify_all(); } else { self.work_available.notify_one(); } - let _sync = self - .all_workers_parked - .wait_while(sync, |sync| { - sync.worker_group_state == WorkerGroupState::Working - }) - .unwrap(); } - /// Test if the worker group is in the `Sleeping` state. - pub fn debug_is_sleeping(&self) -> bool { - let sync = self.sync.lock().unwrap(); - sync.worker_group_state == WorkerGroupState::Sleeping - } - - /// Park until more work is available. - /// The argument `worker` indicates this function can only be called by workers. - pub fn park_and_wait(&self, worker: &GCWorker) { + /// Park a worker until more work is available. + /// If it is the last worker parked, `on_last_parked` will be called. + /// If `on_last_parked` returns `true`, this function will notify other workers about available + /// work before unparking the current worker; + /// if `on_last_parked` returns `false`, the current worker will also wait for work packets to + /// be added. + pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) + where + VM: VMBinding, + F: FnOnce() -> bool, + { let mut sync = self.sync.lock().unwrap(); // Park this worker @@ -189,13 +137,17 @@ impl WorkerMonitor { trace!("Worker {} parked.", worker.ordinal); if all_parked { - // If all workers are parked, enter "Sleeping" state and notify controller. - sync.worker_group_state = WorkerGroupState::Sleeping; - debug!( - "Worker {} notifies the coordinator that all workerer parked.", - worker.ordinal - ); - self.all_workers_parked.notify_one(); + debug!("Worker {} is the last worker parked.", worker.ordinal); + let more_work_available = on_last_parked(); + if more_work_available { + self.notify_work_available_inner(true, &mut sync); + } else { + // It didn't make more work available. Keep waiting. + // This should happen when + // 1. Worker threads have just been created, but GC has not started, yet, or + // 2. A GC has just finished. + sync = self.work_available.wait(sync).unwrap(); + } } else { // Otherwise wait until notified. // Note: The condition for this `cond.wait` is "more work is available". @@ -205,14 +157,6 @@ impl WorkerMonitor { sync = self.work_available.wait(sync).unwrap(); } - // If we are in the `Sleeping` state, wait until leaving that state. - sync = self - .work_available - .wait_while(sync, |sync| { - sync.worker_group_state == WorkerGroupState::Sleeping - }) - .unwrap(); - // Unpark this worker. sync.dec_parked_workers(); trace!("Worker {} unparked.", worker.ordinal); From b64e2a78ecb42e1a60431f9e7473aa1a6f027f37 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 3 Jan 2024 12:17:20 +0800 Subject: [PATCH 04/48] WIP: Remove Controller --- src/memory_manager.rs | 17 +---- src/scheduler/controller.rs | 134 ------------------------------------ src/scheduler/mod.rs | 3 - src/scheduler/scheduler.rs | 21 +++++- src/scheduler/worker.rs | 8 ++- src/vm/collection.rs | 6 +- 6 files changed, 29 insertions(+), 160 deletions(-) delete mode 100644 src/scheduler/controller.rs diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 67135e9678..e5a918233f 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -16,7 +16,7 @@ use crate::mmtk::MMTK; use crate::plan::AllocationSemantics; use crate::plan::{Mutator, MutatorContext}; use crate::scheduler::WorkBucketStage; -use crate::scheduler::{GCController, GCWork, GCWorker}; +use crate::scheduler::{GCWork, GCWorker}; use crate::util::alloc::allocators::AllocatorSelector; use crate::util::constants::{LOG_BYTES_IN_PAGE, MIN_OBJECT_SIZE}; use crate::util::heap::layout::vm_layout::vm_layout; @@ -461,21 +461,6 @@ pub fn gc_poll(mmtk: &MMTK, tls: VMMutatorThread) { } } -/// Run the main loop for the GC controller thread. This method does not return. -/// -/// Arguments: -/// * `tls`: The thread that will be used as the GC controller. -/// * `gc_controller`: The execution context of the GC controller threa. -/// It is the `GCController` passed to `Collection::spawn_gc_thread`. -/// * `mmtk`: A reference to an MMTk instance. -pub fn start_control_collector( - _mmtk: &'static MMTK, - tls: VMWorkerThread, - gc_controller: &mut GCController, -) { - gc_controller.run(tls); -} - /// Run the main loop of a GC worker. This method does not return. /// /// Arguments: diff --git a/src/scheduler/controller.rs b/src/scheduler/controller.rs deleted file mode 100644 index 125efbbe91..0000000000 --- a/src/scheduler/controller.rs +++ /dev/null @@ -1,134 +0,0 @@ -//! The GC controller thread. -//! -//! MMTk has many GC threads. There are many GC worker threads and one GC controller thread. -//! The GC controller thread responds to GC requests and coordinates the workers to perform GC. - -use std::sync::Arc; - -use crate::plan::gc_requester::GCRequester; -use crate::scheduler::gc_work::{EndOfGC, ScheduleCollection}; -use crate::scheduler::{GCWork, WorkBucketStage}; -use crate::util::VMWorkerThread; -use crate::vm::VMBinding; -use crate::MMTK; - -use super::{GCWorkScheduler, GCWorker}; - -/// The thread local struct for the GC controller, the counterpart of `GCWorker`. -pub struct GCController { - /// The reference to the MMTk instance. - mmtk: &'static MMTK, - /// The reference to the GC requester. - requester: Arc>, - /// The reference to the scheduler. - scheduler: Arc>, - /// The `GCWorker` is used to execute packets. The controller is also a `GCWorker`. - coordinator_worker: GCWorker, -} - -impl GCController { - /// The main loop for the GC controller. - pub fn run(&mut self, tls: VMWorkerThread) -> ! { - probe!(mmtk, gccontroller_run); - // Initialize the GC worker for coordinator. We are not using the run() method from - // GCWorker so we manually initialize the worker here. - self.coordinator_worker.tls = tls; - - loop { - debug!("[STWController: Waiting for request...]"); - //self.requester.wait_for_request(); - debug!("[STWController: Request recieved.]"); - - self.do_gc_until_completion_traced(); - debug!("[STWController: Worker threads complete!]"); - } - } - - /// Find more work for workers to do. Return true if more work is available. - fn find_more_work_for_workers(&mut self) -> bool { - if self.scheduler.worker_group.has_designated_work() { - return true; - } - - // See if any bucket has a sentinel. - if self.scheduler.schedule_sentinels() { - return true; - } - - // Try to open new buckets. - if self.scheduler.update_buckets() { - return true; - } - - // If all of the above failed, it means GC has finished. - false - } - - /// A wrapper method for [`do_gc_until_completion`](GCController::do_gc_until_completion) to insert USDT tracepoints. - fn do_gc_until_completion_traced(&mut self) { - probe!(mmtk, gc_start); - self.do_gc_until_completion(); - probe!(mmtk, gc_end); - } - - /// Coordinate workers to perform GC in response to a GC request. - fn do_gc_until_completion(&mut self) { - let gc_start = std::time::Instant::now(); - - // debug_assert!( - // self.scheduler.worker_monitor.debug_is_sleeping(), - // "Workers are still doing work when GC started." - // ); - - // Add a ScheduleCollection work packet. It is the seed of other work packets. - self.scheduler.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); - - // Notify only one worker at this time because there is only one work packet, - // namely `ScheduleCollection`. - // self.scheduler.worker_monitor.resume_and_wait(false); - - // Gradually open more buckets as workers stop each time they drain all open bucket. - loop { - // Workers should only transition to the `Sleeping` state when all open buckets have - // been drained. - self.scheduler.assert_all_activated_buckets_are_empty(); - - let new_work_available = self.find_more_work_for_workers(); - - // GC finishes if there is no new work to do. - if !new_work_available { - break; - } - - // Notify all workers because there should be many work packets available in the newly - // opened bucket(s). - // self.scheduler.worker_monitor.resume_and_wait(true); - } - - // All GC workers must have parked by now. - // debug_assert!(self.scheduler.worker_monitor.debug_is_sleeping()); - debug_assert!(!self.scheduler.worker_group.has_designated_work()); - debug_assert!(self.scheduler.all_buckets_empty()); - - // Deactivate all work buckets to prepare for the next GC. - // NOTE: There is no need to hold any lock. - // Workers are in the `Sleeping` state. - // so they will not wake up while we deactivate buckets. - self.scheduler.deactivate_all(); - - // Tell GC trigger that GC ended - this happens before EndOfGC where we resume mutators. - self.mmtk.gc_trigger.policy.on_gc_end(self.mmtk); - - // Finalization: Resume mutators, reset gc states - // Note: Resume-mutators must happen after all work buckets are closed. - // Otherwise, for generational GCs, workers will receive and process - // newly generated remembered-sets from those open buckets. - // But these remsets should be preserved until next GC. - let mut end_of_gc = EndOfGC { - elapsed: gc_start.elapsed(), - }; - end_of_gc.do_work_with_stat(&mut self.coordinator_worker, self.mmtk); - - self.scheduler.debug_assert_all_buckets_deactivated(); - } -} diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 03b7d822bc..45501e9943 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -20,8 +20,5 @@ mod worker; pub(crate) use worker::current_worker_ordinal; pub use worker::GCWorker; -mod controller; -pub use controller::GCController; - pub(crate) mod gc_work; pub use gc_work::ProcessEdgesWork; diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 3372ff9770..9fc8fc18fd 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -276,7 +276,7 @@ impl GCWorkScheduler { } /// Check if all the work buckets are empty - pub(crate) fn assert_all_activated_buckets_are_empty(&self) { + pub(crate) fn assert_all_activated_buckets_are_empty(&self, worker: &GCWorker) { let mut error_example = None; for (id, bucket) in self.work_buckets.iter() { if bucket.is_activated() && !bucket.is_empty() { @@ -286,6 +286,16 @@ impl GCWorkScheduler { // we should show at least one abnormal bucket in the panic message // so that we still have some information for debugging. error_example = Some(id); + + while !bucket.is_empty() { + match bucket.poll(&worker.local_work_buffer) { + Steal::Success(w) => { + error!(" Bucket {:?} has {:?}", id, w.get_type_name()); + }, + Steal::Retry => continue, + _ => {} + } + } } } if let Some(id) = error_example { @@ -364,8 +374,12 @@ impl GCWorkScheduler { // Test whether we are doing GC. if worker.mmtk.gc_requester.is_gc_scheduled() { - trace!("GC is scheduled. Try to find more work to do..."); // We are in the middle of GC, and the last GC worker parked. + trace!("GC is scheduled. Try to find more work to do..."); + + // During GC, if all workers parked, all open buckets must have been drained. + self.assert_all_activated_buckets_are_empty(worker); + // Find more work for workers to do. let found_more_work = self.find_more_work_for_workers(); @@ -395,16 +409,19 @@ impl GCWorkScheduler { /// Find more work for workers to do. Return true if more work is available. fn find_more_work_for_workers(&self) -> bool { if self.worker_group.has_designated_work() { + trace!("Some workers have designated work."); return true; } // See if any bucket has a sentinel. if self.schedule_sentinels() { + trace!("Some sentinels are scheduled."); return true; } // Try to open new buckets. if self.update_buckets() { + trace!("Some buckets are opened."); return true; } diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 3f483f7200..dead43a048 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -134,7 +134,13 @@ impl WorkerMonitor { // Park this worker let all_parked = sync.inc_parked_workers(); - trace!("Worker {} parked.", worker.ordinal); + trace!( + "Worker {} parked. parked/total: {}/{}. All parked: {}", + worker.ordinal, + sync.parked_workers, + sync.worker_count, + all_parked + ); if all_parked { debug!("Worker {} is the last worker parked.", worker.ordinal); diff --git a/src/vm/collection.rs b/src/vm/collection.rs index 5420224ab0..7fe17ccdfd 100644 --- a/src/vm/collection.rs +++ b/src/vm/collection.rs @@ -4,9 +4,8 @@ use crate::vm::VMBinding; use crate::{scheduler::*, Mutator}; /// Thread context for the spawned GC thread. It is used by spawn_gc_thread. +/// Currently, `GCWorker` is the only kind of thread that mmtk-core will create. pub enum GCThreadContext { - /// The GC thread to spawn is a controller thread. There is only one controller thread. - Controller(Box>), /// The GC thread to spawn is a worker thread. There can be multiple worker threads. Worker(Box>), } @@ -53,10 +52,9 @@ pub trait Collection { /// * `tls`: The thread pointer for the parent thread that we spawn new threads from. This is the same `tls` when the VM /// calls `initialize_collection()` and passes as an argument. /// * `ctx`: The context for the GC thread. - /// * If `Controller` is passed, it means spawning a thread to run as the GC controller. - /// The spawned thread shall call `memory_manager::start_control_collector`. /// * If `Worker` is passed, it means spawning a thread to run as a GC worker. /// The spawned thread shall call `memory_manager::start_worker`. + /// Currently `Worker` is the only kind of thread which mmtk-core will create. /// In either case, the `Box` inside should be passed back to the called function. fn spawn_gc_thread(tls: VMThread, ctx: GCThreadContext); From 4fa8fac5bf7f6a73750157fe076bc0104d7b67d3 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 3 Jan 2024 17:24:55 +0800 Subject: [PATCH 05/48] Fix deadlock between GCRequester and GC workers. --- src/global_state.rs | 20 +++++++++--- src/mmtk.rs | 14 ++++----- src/plan/gc_requester.rs | 59 +++++++++++++++++++++--------------- src/scheduler/scheduler.rs | 45 +++++++++++++++++++++------ src/scheduler/work_bucket.rs | 22 +++++++++++--- src/scheduler/worker.rs | 46 +++++++++++++++++++++------- 6 files changed, 145 insertions(+), 61 deletions(-) diff --git a/src/global_state.rs b/src/global_state.rs index 2340957eae..55dbab7b3b 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -1,8 +1,9 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::Mutex; use std::time::Instant; +use atomic::Atomic; use atomic_refcell::AtomicRefCell; +use bytemuck::NoUninit; /// This stores some global states for an MMTK instance. /// Some MMTK components like plans and allocators may keep an reference to the struct, and can access it. @@ -20,7 +21,7 @@ pub struct GlobalState { /// bindings to temporarily disable GC, at which point, we do not trigger GC even if the heap is full. pub(crate) trigger_gc_when_heap_is_full: AtomicBool, /// The current GC status. - pub(crate) gc_status: Mutex, + pub(crate) gc_status: Atomic, /// Is the current GC an emergency collection? Emergency means we may run out of memory soon, and we should /// attempt to collect as much as we can. pub(crate) emergency_collection: AtomicBool, @@ -209,7 +210,7 @@ impl Default for GlobalState { Self { initialized: AtomicBool::new(false), trigger_gc_when_heap_is_full: AtomicBool::new(true), - gc_status: Mutex::new(GcStatus::NotInGC), + gc_status: Atomic::new(GcStatus::NotInGC), stacks_prepared: AtomicBool::new(false), emergency_collection: AtomicBool::new(false), user_triggered_collection: AtomicBool::new(false), @@ -229,9 +230,20 @@ impl Default for GlobalState { } } -#[derive(PartialEq)] +/// GC status, an indicator of whether MMTk is in the progress of a GC or not. +/// +/// This type was only used for assertions in JikesRVM. After we removed the coordinator (a.k.a. +/// controller) thread, we use the GC status to decide whether GC workers should open new work +/// buckets when the last worker parked. +// FIXME: `GcProper` is inherited from JikesRVM, but it is not used in MMTk core. Consider +// removing it. +#[derive(PartialEq, Clone, Copy, NoUninit, Debug)] +#[repr(u8)] pub enum GcStatus { + /// Not in GC NotInGC, + /// GC has started, but not all stacks have been scanned, yet. GcPrepare, + /// GC has started, and all stacks have been scanned. GcProper, } diff --git a/src/mmtk.rs b/src/mmtk.rs index 3d91f23ae8..9c433fa06d 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -255,15 +255,15 @@ impl MMTK { self.inside_sanity.load(Ordering::Relaxed) } - pub(crate) fn set_gc_status(&self, s: GcStatus) { - let mut gc_status = self.state.gc_status.lock().unwrap(); - if *gc_status == GcStatus::NotInGC { + pub(crate) fn set_gc_status(&self, new_status: GcStatus) { + let old_status = self.state.gc_status.swap(new_status, Ordering::SeqCst); + debug_assert_ne!(old_status, new_status); + if old_status == GcStatus::NotInGC { self.state.stacks_prepared.store(false, Ordering::SeqCst); // FIXME stats self.stats.start_gc(); } - *gc_status = s; - if *gc_status == GcStatus::NotInGC { + if new_status == GcStatus::NotInGC { // FIXME stats if self.stats.get_gathering_stats() { self.stats.end_gc(); @@ -273,12 +273,12 @@ impl MMTK { /// Return true if a collection is in progress. pub fn gc_in_progress(&self) -> bool { - *self.state.gc_status.lock().unwrap() != GcStatus::NotInGC + self.state.gc_status.load(Ordering::SeqCst) != GcStatus::NotInGC } /// Return true if a collection is in progress and past the preparatory stage. pub fn gc_in_progress_proper(&self) -> bool { - *self.state.gc_status.lock().unwrap() == GcStatus::GcProper + self.state.gc_status.load(Ordering::SeqCst) == GcStatus::GcProper } /// Return true if the current GC is an emergency GC. diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 53419aa486..03d54889df 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -5,13 +5,17 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; struct RequestSync { - /// Is GC scheduled (but not finished)? + /// Has the GCRequester called `GCWorkScheduler::schedule_collection` for the current request? + /// This flag exists so that once `GCRequester` called `GCWorkScheduler::schedule_collection`, + /// it cannot call it again until the GC it initiated finished. gc_scheduled: bool, } /// This data structure lets mutators trigger GC, and may schedule collection when appropriate. pub struct GCRequester { request_sync: Mutex, + /// An atomic flag outside `RequestSync` so that mutators can check if GC has already been + /// requested in `poll` without acquiring the mutex. request_flag: AtomicBool, scheduler: Arc>, phantom: PhantomData, @@ -32,18 +36,23 @@ impl GCRequester { /// Request a GC. Called by mutators when polling (during allocation) and when handling user /// GC requests (e.g. `System.gc();` in Java); pub fn request(&self) { + // Note: This is the double-checked locking algorithm. + // The load has the `Relaxed` order instead of `Acquire` because we are not doing lazy + // initialization here. We are only using this flag to remove successive requests. if self.request_flag.load(Ordering::Relaxed) { return; } let mut guard = self.request_sync.lock().unwrap(); - // Note: This is the double-checked locking algorithm. - // The load has the `Relaxed` order instead of `Acquire` because we only use the flag to - // remove successive requests, but we don't use it to synchronize other data fields. if !self.request_flag.load(Ordering::Relaxed) { self.request_flag.store(true, Ordering::Relaxed); - self.try_schedule_collection(&mut *guard); + let should_schedule_gc = self.try_schedule_collection(&mut guard); + if should_schedule_gc { + self.scheduler.mutator_schedule_collection(); + // Note: We do not clear `request_flag` now. It will be cleared by `clear_request` + // after all mutators have stopped. + } } } @@ -61,32 +70,34 @@ impl GCRequester { } /// Called by a GC worker when a GC has finished. - /// This will check the `request_flag` again and schedule the next GC. - /// - /// Note that this may schedule the next GC immediately if - /// 1. The plan is concurrent, and a mutator triggered another GC while the current GC was - /// still running (between `clear_request` and `on_gc_finished`), or - /// 2. After the invocation of `resume_mutators`, a mutator runs so fast that it - /// exhausted the heap, or called `handle_user_collection_request`, before this function - /// is called. - pub fn on_gc_finished(&self) { + /// This will check the `request_flag` again and check if we should immediately schedule the + /// next GC. If we should, `gc_scheduled` will be set back to `true` and this function will + /// return `true`. + pub fn on_gc_finished(&self) -> bool { let mut guard = self.request_sync.lock().unwrap(); guard.gc_scheduled = false; - self.try_schedule_collection(&mut *guard); + self.try_schedule_collection(&mut guard) } - fn try_schedule_collection(&self, sync: &mut RequestSync) { - // Do not schedule collection if a GC is still in progress. - // When the GC finishes, a GC worker will call `on_gc_finished` and check `request_flag` - // again. + /// Decide whether we should schedule a new collection. Will transition the state of + /// `gc_scheduled` from `false` to `true` if we should schedule a new collection. + /// Return `true` if the state transition happens. + fn try_schedule_collection(&self, sync: &mut RequestSync) -> bool { + // The time to schedule a collection is when `request_flag` is `true` but `gc_scheduled` + // is `false`. `gc_scheduled` is `true` if either + // + // 1. another mutator called `request()` concurrently and scheduled a collection, or + // 2. a new GC is requested while the current GC is still in progress. + // + // If `gc_scheduled` is `true` when GC is requested, we do nothing now. But when the + // currrent GC finishes, a GC worker will call `on_gc_finished` which clears the + // `gc_scheduled` flag, and checks the `request_flag` again to trigger the next GC. if self.request_flag.load(Ordering::Relaxed) && !sync.gc_scheduled { - self.scheduler.schedule_collection(); - sync.gc_scheduled = true; - - // Note: We do not clear `request_flag` now. It will be cleared by `clear_request` - // after all mutators have stopped. + true + } else { + false } } } diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 9fc8fc18fd..0f035d7a93 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -5,6 +5,7 @@ use super::worker::{GCWorker, ThreadId, WorkerGroup, WorkerMonitor}; use super::*; use crate::global_state::GcStatus; use crate::mmtk::MMTK; +use crate::scheduler::worker::LastParkedResult; use crate::util::opaque_pointer::*; use crate::util::options::AffinityKind; use crate::util::rust_util::array_from_fn; @@ -89,10 +90,11 @@ impl GCWorkScheduler { } /// Schedule collection. Called via `GCRequester`. - /// Because this function may be called by a mutator thread, we only add a `ScheduleCollection` + /// Because this function is called by a mutator thread, we only add a `ScheduleCollection` /// work packet here so that a GC worker can wake up later and actually schedule the work for a /// collection. - pub(crate) fn schedule_collection(&self) { + pub(crate) fn mutator_schedule_collection(&self) { + debug!("Adding ScheduleCollection work packet upon mutator request."); // Add a ScheduleCollection work packet. It is the seed of other work packets. self.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); } @@ -373,7 +375,7 @@ impl GCWorkScheduler { // This is the last worker parked. // Test whether we are doing GC. - if worker.mmtk.gc_requester.is_gc_scheduled() { + if worker.mmtk.gc_in_progress() { // We are in the middle of GC, and the last GC worker parked. trace!("GC is scheduled. Try to find more work to do..."); @@ -383,12 +385,17 @@ impl GCWorkScheduler { // Find more work for workers to do. let found_more_work = self.find_more_work_for_workers(); - if !found_more_work { + if found_more_work { + LastParkedResult::WakeAll + } else { // GC finished. - self.on_gc_finished(worker); + let scheduled_next_gc = self.on_gc_finished(worker); + if scheduled_next_gc { + LastParkedResult::WakeSelf + } else { + LastParkedResult::ParkSelf + } } - - found_more_work } else { trace!("GC is not scheduled. Wait for the first GC."); // GC is not scheduled. Do nothing. @@ -400,7 +407,7 @@ impl GCWorkScheduler { // buckets. // If a GC worker spuriously wakes up when GC is not scheduled, it should not // do anything, either. - false + LastParkedResult::ParkSelf } }); } @@ -430,7 +437,8 @@ impl GCWorkScheduler { } /// Called when GC has finished, i.e. when all work packets have been executed. - fn on_gc_finished(&self, worker: &GCWorker) { + /// Return `true` if it scheduled the next GC immediately. + fn on_gc_finished(&self, worker: &GCWorker) -> bool { // All GC workers (except this one) must have parked by now. debug_assert!(!self.worker_group.has_designated_work()); debug_assert!(self.all_buckets_empty()); @@ -494,7 +502,24 @@ impl GCWorkScheduler { ::VMCollection::resume_mutators(worker.tls); // Notify the `GCRequester` that GC has finished. - mmtk.gc_requester.on_gc_finished(); + let should_schedule_gc_now = mmtk.gc_requester.on_gc_finished(); + if should_schedule_gc_now { + // We should schedule the next GC immediately. This means GC was triggered between + // `clear_request` (when stacks were scanned) and `on_gc_finished` (right above). This + // can happen if + // 1. It is concurrent GC, and a mutator triggered another GC while the current GC was + // still running, or + // 2. It is STW GC, but after the invocation of `resume_mutators` above, one mutator + // ran so fast that it triggered a GC before we called `on_gc_finished`. + // Note that we are holding the `WorkerMonitor` mutex, and cannot notify workers now. + // When this function returns, the current worker should continue to execute the newly + // added `ScheduleCollection` work packet. + debug!("GC already requested before `on_gc_finished`. Add ScheduleCollection now."); + self.work_buckets[WorkBucketStage::Unconstrained].add_no_notify(ScheduleCollection); + true + } else { + false + } } pub fn enable_stat(&self) { diff --git a/src/scheduler/work_bucket.rs b/src/scheduler/work_bucket.rs index d6c6cb6545..d7076354eb 100644 --- a/src/scheduler/work_bucket.rs +++ b/src/scheduler/work_bucket.rs @@ -139,6 +139,19 @@ impl WorkBucket { self.notify_one_worker(); } + /// Add a work packet ot this bucket, but do not notify any workers. + /// This is useful when the current thread is holding the mutex of `WorkerMonitor` which is + /// used for notifying workers. This usually happens if the current thread is the last worker + /// parked. + pub(crate) fn add_no_notify>(&self, work: W) { + self.queue.push(Box::new(work)); + } + + /// Like `add_no_notify`, but the work is boxed. + pub(crate) fn add_boxed_no_notify(&self, work: Box>) { + self.queue.push(work); + } + /// Add multiple packets with a higher priority. /// Panic if this bucket cannot receive prioritized packets. pub fn bulk_add_prioritized(&self, work_vec: Vec>>) { @@ -210,11 +223,10 @@ impl WorkBucket { sentinel.take() }; if let Some(work) = maybe_sentinel { - // We don't need to call `self.add` because this function is called by the coordinator - // when workers are stopped. We don't need to notify the workers because the - // coordinator will do that later. - // We can just "sneak" the sentinel work packet into the current bucket. - self.queue.push(work); + // We don't need to notify other workers because this function is called by the last + // parked worker. After this function returns, the caller will notify workers because + // more work packets become available. + self.add_boxed_no_notify(work); true } else { false diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index dead43a048..764f9f649d 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -75,6 +75,17 @@ impl GCWorkerShared { } } +/// The result type of the `on_last_parked` call-back in `WorkMonitor::park_and_wait`. +/// It decides how many workers should wake up after `on_last_parked`. +pub(crate) enum LastParkedResult { + /// The last parked worker should wait, too, until more work packets are added. + ParkSelf, + /// The last parked worker should unpark and find work packet to do. + WakeSelf, + /// Wake up all parked GC workers. + WakeAll, +} + /// A data structure for parking and waking up workers. /// It keeps track of the number of workers parked, and allow the last parked worker to perform /// operations that require all other workers to be parked. @@ -128,7 +139,7 @@ impl WorkerMonitor { pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) where VM: VMBinding, - F: FnOnce() -> bool, + F: FnOnce() -> LastParkedResult, { let mut sync = self.sync.lock().unwrap(); @@ -144,15 +155,23 @@ impl WorkerMonitor { if all_parked { debug!("Worker {} is the last worker parked.", worker.ordinal); - let more_work_available = on_last_parked(); - if more_work_available { - self.notify_work_available_inner(true, &mut sync); - } else { - // It didn't make more work available. Keep waiting. - // This should happen when - // 1. Worker threads have just been created, but GC has not started, yet, or - // 2. A GC has just finished. - sync = self.work_available.wait(sync).unwrap(); + let result = on_last_parked(); + match result { + LastParkedResult::ParkSelf => { + // It didn't make more work available. Keep waiting. + // This should happen when + // 1. Worker threads have just been created, but GC has not started, yet, or + // 2. A GC has just finished. + sync = self.work_available.wait(sync).unwrap(); + } + LastParkedResult::WakeSelf => { + // Continue without waiting. + // This should happen when only one work packet is made available. + } + LastParkedResult::WakeAll => { + // Notify all GC workers. + self.notify_work_available_inner(true, &mut sync); + } } } else { // Otherwise wait until notified. @@ -165,7 +184,12 @@ impl WorkerMonitor { // Unpark this worker. sync.dec_parked_workers(); - trace!("Worker {} unparked.", worker.ordinal); + trace!( + "Worker {} unparked. parked/total: {}/{}.", + worker.ordinal, + sync.parked_workers, + sync.worker_count, + ); } } From 8f8c3e008cb99781278d70e5354f7f52f5108c6c Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 4 Jan 2024 12:11:25 +0800 Subject: [PATCH 06/48] Remove comments and add back trace points --- docs/header/mmtk.h | 3 --- src/scheduler/gc_work.rs | 3 +++ src/scheduler/scheduler.rs | 20 +++++++++++--------- src/scheduler/worker.rs | 21 ++------------------- src/util/options.rs | 2 +- src/vm/collection.rs | 3 +-- tools/tracing/README.md | 1 - 7 files changed, 18 insertions(+), 35 deletions(-) diff --git a/docs/header/mmtk.h b/docs/header/mmtk.h index 7fb6949d6b..3985bee06a 100644 --- a/docs/header/mmtk.h +++ b/docs/header/mmtk.h @@ -84,9 +84,6 @@ extern void mmtk_scan_region(); // Request MMTk to trigger a GC. Note that this may not actually trigger a GC extern void mmtk_handle_user_collection_request(void* tls); -// Run the main loop for the GC controller thread. Does not return -extern void mmtk_start_control_collector(void* tls, void* worker); - // Run the main loop for a GC worker. Does not return extern void mmtk_start_worker(void* tls, void* worker); diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs index a651437911..2be49a06eb 100644 --- a/src/scheduler/gc_work.rs +++ b/src/scheduler/gc_work.rs @@ -14,6 +14,9 @@ pub struct ScheduleCollection; impl GCWork for ScheduleCollection { fn do_work(&mut self, worker: &mut GCWorker, mmtk: &'static MMTK) { + // This is officially when a GC starts. + probe!(mmtk, gc_start); + // Record the time when GC starts. { let mut guard = mmtk.state.gc_start_time.borrow_mut(); diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 0f035d7a93..c81ea280ed 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -79,7 +79,7 @@ impl GCWorkScheduler { self.worker_group.as_ref().worker_count() } - /// Create GC threads, including the controller thread and all workers. + /// Create GC threads, including all workers. pub fn spawn_gc_threads(self: &Arc, mmtk: &'static MMTK, tls: VMThread) { self.worker_group.spawn(mmtk, tls) } @@ -501,6 +501,9 @@ impl GCWorkScheduler { mmtk.set_gc_status(GcStatus::NotInGC); ::VMCollection::resume_mutators(worker.tls); + // GC offically ends here. + probe!(mmtk, gc_end); + // Notify the `GCRequester` that GC has finished. let should_schedule_gc_now = mmtk.gc_requester.on_gc_finished(); if should_schedule_gc_now { @@ -542,14 +545,13 @@ impl GCWorkScheduler { mmtk.gc_requester.clear_request(); let first_stw_bucket = &self.work_buckets[WorkBucketStage::first_stw_stage()]; debug_assert!(!first_stw_bucket.is_activated()); - // Note: This is the only place where a non-coordinator thread opens a bucket. - // If the `StopMutators` is executed by the coordinator thread, it will open - // the `Prepare` bucket and let workers start executing packets while the coordinator - // can still add more work packets to `Prepare`. However, since `Prepare` is the first STW - // bucket and only the coordinator can open any subsequent buckets, workers cannot execute - // work packets out of order. This is not generally true if we are not opening the first - // STW bucket. In the future, we should redesign the opening condition of work buckets to - // make the synchronization more robust, + // Note: This is the only place where a bucket is opened without having all workers parked. + // We usually require all workers to park before opening new buckets because otherwise + // packets will be executed out of order. However, since `Prepare` is the first STW + // bucket, and all subsequent buckets require all workers to park before opening, workers + // cannot execute work packets out of order. This is not generally true if we are not + // opening the first STW bucket. In the future, we should redesign the opening condition + // of work buckets to make the synchronization more robust, first_stw_bucket.activate(); self.worker_monitor.notify_work_available(true); } diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 764f9f649d..bd5367f987 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -44,10 +44,6 @@ pub struct GCWorkerShared { #[cfg(feature = "count_live_bytes_in_gc")] live_bytes: AtomicUsize, /// A queue of GCWork that can only be processed by the owned thread. - /// - /// Note: Currently, designated work cannot be added from the GC controller thread, or - /// there will be synchronization problems. If it is necessary to do so, we need to - /// update the code in `GCWorkScheduler::poll_slow` for proper synchornization. pub designated_work: ArrayQueue>>, /// Handle for stealing packets from the current worker pub stealer: Option>>>, @@ -218,12 +214,10 @@ impl WorkerMonitorSync { } /// A GC worker. This part is privately owned by a worker thread. -/// The GC controller also has an embedded `GCWorker` because it may also execute work packets. pub struct GCWorker { /// The VM-specific thread-local state of the GC thread. pub tls: VMWorkerThread, - /// The ordinal of the worker, numbered from 0 to the number of workers minus one. The ordinal - /// is usize::MAX if it is the embedded worker of the GC controller thread. + /// The ordinal of the worker, numbered from 0 to the number of workers minus one. pub ordinal: ThreadId, /// The reference to the scheduler. scheduler: Arc>, @@ -231,9 +225,6 @@ pub struct GCWorker { copy: GCWorkerCopyContext, /// The reference to the MMTk instance. pub mmtk: &'static MMTK, - /// True if this struct is the embedded GCWorker of the controller thread. - /// False if this struct belongs to a standalone GCWorker thread. - is_coordinator: bool, /// Reference to the shared part of the GC worker. It is used for synchronization. pub shared: Arc>, /// Local work packet queue. @@ -262,7 +253,6 @@ impl GCWorker { mmtk: &'static MMTK, ordinal: ThreadId, scheduler: Arc>, - is_coordinator: bool, shared: Arc>, local_work_buffer: deque::Worker>>, ) -> Self { @@ -273,7 +263,6 @@ impl GCWorker { copy: GCWorkerCopyContext::new_non_copy(), scheduler, mmtk, - is_coordinator, shared, local_work_buffer, } @@ -307,11 +296,6 @@ impl GCWorker { self.local_work_buffer.push(Box::new(work)); } - /// Is this worker a coordinator or a normal GC worker? - pub fn is_coordinator(&self) -> bool { - self.is_coordinator - } - /// Get the scheduler. There is only one scheduler per MMTk instance. pub fn scheduler(&self) -> &GCWorkScheduler { &self.scheduler @@ -364,7 +348,7 @@ impl GCWorker { } } -/// A worker group to manage all the GC workers (except the coordinator worker). +/// A worker group to manage all the GC workers. pub(crate) struct WorkerGroup { /// Shared worker data pub workers_shared: Vec>>, @@ -401,7 +385,6 @@ impl WorkerGroup { mmtk, ordinal, mmtk.scheduler.clone(), - false, shared.clone(), unspawned_local_work_queues.pop().unwrap(), )); diff --git a/src/util/options.rs b/src/util/options.rs index 11a1b329ae..eb6513e1c3 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -705,7 +705,7 @@ mod gc_trigger_tests { options! { /// The GC plan to use. plan: PlanSelector [env_var: true, command_line: true] [always_valid] = PlanSelector::GenImmix, - /// Number of GC worker threads. (There is always one GC controller thread besides the GC workers) + /// Number of GC worker threads. // FIXME: Currently we create GCWorkScheduler when MMTK is created, which is usually static. // To allow this as a command-line option, we need to refactor the creation fo the `MMTK` instance. // See: https://github.com/mmtk/mmtk-core/issues/532 diff --git a/src/vm/collection.rs b/src/vm/collection.rs index 7fe17ccdfd..b3b172ded0 100644 --- a/src/vm/collection.rs +++ b/src/vm/collection.rs @@ -30,8 +30,7 @@ pub trait Collection { /// This method may not be called by the same GC thread that called `stop_all_mutators`. /// /// Arguments: - /// * `tls`: The thread pointer for the GC worker. Currently it is the tls of the embedded `GCWorker` instance - /// of the coordinator thread, but it is subject to change, and should not be depended on. + /// * `tls`: The thread pointer for the GC worker. fn resume_mutators(tls: VMWorkerThread); /// Block the current thread for GC. This is called when an allocation request cannot be fulfilled and a GC diff --git a/tools/tracing/README.md b/tools/tracing/README.md index 23a633dd21..1c8598abe0 100644 --- a/tools/tracing/README.md +++ b/tools/tracing/README.md @@ -15,7 +15,6 @@ Currently, the core provides the following tracepoints. - `mmtk:collection_initialized()`: GC is enabled - `mmtk:harness_begin()`: the timing iteration of a benchmark begins - `mmtk:harness_end()`: the timing iteration of a benchmark ends -- `mmtk:gccontroller_run()`: the GC controller thread enters its work loop - `mmtk:gcworker_run()`: a GC worker thread enters its work loop - `mmtk:gc_start()`: a collection epoch starts - `mmtk:gc_end()`: a collection epoch ends From 845eea6f0d5ca1d1732071ab82fc13d6deb46b69 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 5 Jan 2024 00:05:20 +0800 Subject: [PATCH 07/48] WIP: Updating comments and minor code change. Found a case where workers may sleep forever. Need to be fixed later. --- Cargo.toml | 1 + src/global_state.rs | 7 +++-- src/scheduler/scheduler.rs | 54 ++++++++++++++------------------ src/scheduler/worker.rs | 63 ++++++++++++++++++++++++++------------ src/util/options.rs | 3 -- 5 files changed, 73 insertions(+), 55 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 558f820701..3461c25835 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,6 +50,7 @@ static_assertions = "1.1.0" strum = "0.25" strum_macros = "0.25" sysinfo = "0.29" +rand = "0.8.5" [dev-dependencies] paste = "1.0.8" diff --git a/src/global_state.rs b/src/global_state.rs index 55dbab7b3b..4d57fd2e23 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -44,8 +44,11 @@ pub struct GlobalState { pub(crate) stacks_prepared: AtomicBool, /// A counter that keeps tracks of the number of bytes allocated since last stress test pub(crate) allocation_bytes: AtomicUsize, - /// The time when the current GC started. Only accessible in `ScheduleCollection` and - /// `GCWorkScheduler::on_gc_finished` which never happen at the same time. + /// The time when the current GC started. Currently only used for logging. + /// Note that some (but not all) `GCTriggerPolicy` implementations do their own time tracking + /// independently for their own need. + /// This field only accessible in `ScheduleCollection` and `GCWorkScheduler::on_gc_finished` + /// which never happen at the same time, so `AtomicRefCell` is enough. pub(crate) gc_start_time: AtomicRefCell>, /// A counteer that keeps tracks of the number of bytes allocated by malloc #[cfg(feature = "malloc_counted_size")] diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index c81ea280ed..1c44e8c736 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -22,7 +22,7 @@ pub struct GCWorkScheduler { pub work_buckets: EnumMap>, /// Workers pub(crate) worker_group: Arc>, - /// Condition Variable for worker synchronization + /// Monitor for worker synchronization, including a mutex and conditional variables. pub(crate) worker_monitor: Arc, /// How to assign the affinity of each GC thread. Specified by the user. affinity: AffinityKind, @@ -48,14 +48,16 @@ impl GCWorkScheduler { // Set the open condition of each bucket. { - // Unconstrained is always open. Prepare will be opened at the beginning of a GC. - // This vec will grow for each stage we call with open_next() let first_stw_stage = WorkBucketStage::first_stw_stage(); let mut open_stages: Vec = vec![first_stw_stage]; - // The rest will open after the previous stage is done. let stages = (0..WorkBucketStage::LENGTH).map(WorkBucketStage::from_usize); for stage in stages { + // Unconstrained is always open. + // The first STW stage (Prepare) will be opened when the world stopped + // (i.e. when all mutators are suspended). if stage != WorkBucketStage::Unconstrained && stage != first_stw_stage { + // Other work packets will be opened after previous stages are done + // (i.e their buckets are drained and all workers parked). let cur_stages = open_stages.clone(); work_buckets[stage].set_open_condition( move |scheduler: &GCWorkScheduler| { @@ -278,7 +280,7 @@ impl GCWorkScheduler { } /// Check if all the work buckets are empty - pub(crate) fn assert_all_activated_buckets_are_empty(&self, worker: &GCWorker) { + pub(crate) fn assert_all_activated_buckets_are_empty(&self) { let mut error_example = None; for (id, bucket) in self.work_buckets.iter() { if bucket.is_activated() && !bucket.is_empty() { @@ -288,16 +290,6 @@ impl GCWorkScheduler { // we should show at least one abnormal bucket in the panic message // so that we still have some information for debugging. error_example = Some(id); - - while !bucket.is_empty() { - match bucket.poll(&worker.local_work_buffer) { - Steal::Success(w) => { - error!(" Bucket {:?} has {:?}", id, w.get_type_name()); - }, - Steal::Retry => continue, - _ => {} - } - } } } if let Some(id) = error_example { @@ -377,10 +369,10 @@ impl GCWorkScheduler { // Test whether we are doing GC. if worker.mmtk.gc_in_progress() { // We are in the middle of GC, and the last GC worker parked. - trace!("GC is scheduled. Try to find more work to do..."); + trace!("The last worker parked during GC. Try to find more work to do..."); // During GC, if all workers parked, all open buckets must have been drained. - self.assert_all_activated_buckets_are_empty(worker); + self.assert_all_activated_buckets_are_empty(); // Find more work for workers to do. let found_more_work = self.find_more_work_for_workers(); @@ -397,16 +389,16 @@ impl GCWorkScheduler { } } } else { - trace!("GC is not scheduled. Wait for the first GC."); - // GC is not scheduled. Do nothing. - // Note that when GC worker threads has just been created, they will try to get - // work packets to execute. But since the first GC has not started, yet, there - // is not any work packets to execute, yet. Therefore, all workers will park, - // and the last parked worker will reach here. In that case, we should simply - // let workers wait until the first GC starts, instead of trying to open more - // buckets. - // If a GC worker spuriously wakes up when GC is not scheduled, it should not - // do anything, either. + trace!("The last worker parked while not in GC. Wait for GC to start."); + // GC has not started, yet. Do not try to open work buckets. + // + // This branch is usually reached when `initialize_colection` has just been + // called and GC worker threads have just been created. In that case, there is + // no work packets to execute, and workers should park and wait for the first + // GC. + // + // This branch can also be reached if a GC worker spuriously wakes up while not + // in GC. LastParkedResult::ParkSelf } }); @@ -439,7 +431,7 @@ impl GCWorkScheduler { /// Called when GC has finished, i.e. when all work packets have been executed. /// Return `true` if it scheduled the next GC immediately. fn on_gc_finished(&self, worker: &GCWorker) -> bool { - // All GC workers (except this one) must have parked by now. + // All GC workers must have parked by now. debug_assert!(!self.worker_group.has_designated_work()); debug_assert!(self.all_buckets_empty()); @@ -466,6 +458,9 @@ impl GCWorkScheduler { elapsed.as_millis() ); + // USDT tracepoint for the end of GC. + probe!(mmtk, gc_end); + #[cfg(feature = "count_live_bytes_in_gc")] { let live_bytes = mmtk.state.get_live_bytes_in_last_gc(); @@ -501,9 +496,6 @@ impl GCWorkScheduler { mmtk.set_gc_status(GcStatus::NotInGC); ::VMCollection::resume_mutators(worker.tls); - // GC offically ends here. - probe!(mmtk, gc_end); - // Notify the `GCRequester` that GC has finished. let should_schedule_gc_now = mmtk.gc_requester.on_gc_finished(); if should_schedule_gc_now { diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index bd5367f987..754d50b0a3 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -84,7 +84,7 @@ pub(crate) enum LastParkedResult { /// A data structure for parking and waking up workers. /// It keeps track of the number of workers parked, and allow the last parked worker to perform -/// operations that require all other workers to be parked. +/// operations that can only be performed when all workers have parked. pub(crate) struct WorkerMonitor { /// The synchronized part. sync: Mutex, @@ -118,6 +118,8 @@ impl WorkerMonitor { self.notify_work_available_inner(all, &mut guard); } + /// Like `notify_work_available` but the current thread must have already acquired the + /// mutex of `WorkerMonitorSync`. fn notify_work_available_inner(&self, all: bool, _guard: &mut MutexGuard) { if all { self.work_available.notify_all(); @@ -126,17 +128,19 @@ impl WorkerMonitor { } } - /// Park a worker until more work is available. + /// Park a worker until work packets are available. /// If it is the last worker parked, `on_last_parked` will be called. - /// If `on_last_parked` returns `true`, this function will notify other workers about available - /// work before unparking the current worker; - /// if `on_last_parked` returns `false`, the current worker will also wait for work packets to - /// be added. + /// The return value of `on_last_parked` will determine whether this worker will block wait, + /// too, and whether other worker will be waken up. pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) where VM: VMBinding, F: FnOnce() -> LastParkedResult, { + warn!("Sleep before acquiring self.sync..."); + std::thread::sleep(std::time::Duration::from_millis(30)); + warn!("Slept. Now acquire self.sync..."); + let mut sync = self.sync.lock().unwrap(); // Park this worker @@ -149,33 +153,54 @@ impl WorkerMonitor { all_parked ); + let mut should_wait = false; + if all_parked { debug!("Worker {} is the last worker parked.", worker.ordinal); let result = on_last_parked(); match result { LastParkedResult::ParkSelf => { - // It didn't make more work available. Keep waiting. - // This should happen when - // 1. Worker threads have just been created, but GC has not started, yet, or - // 2. A GC has just finished. - sync = self.work_available.wait(sync).unwrap(); + should_wait = true; } LastParkedResult::WakeSelf => { // Continue without waiting. - // This should happen when only one work packet is made available. } LastParkedResult::WakeAll => { - // Notify all GC workers. self.notify_work_available_inner(true, &mut sync); } } } else { - // Otherwise wait until notified. - // Note: The condition for this `cond.wait` is "more work is available". - // If this worker spuriously wakes up, then in the next loop iteration, the - // `poll_schedulable_work` invocation above will fail, and the worker will reach - // here and wait again. - sync = self.work_available.wait(sync).unwrap(); + should_wait = true; + } + + if should_wait { + // Note: + // 1. The condition variable `work_available` is guarded by `self.sync`. Because the + // last parked worker is holding the mutex `self.sync` when executing + // `on_last_parked`, no workers can unpark (even if they spuriously wake up) during + // `on_last_parked` because they cannot re-acquire the mutex `self.sync`. + // 2. Workers may spuriously wake up and unpark when `on_last_parked` is not being + // executed (including the case when the last parked worker is waiting here, too). + // Spurious wake-up is safe because the actual condition for this `cond.wait` is + // "more work is available". If a worker spuriously wakes up, then in the next + // loop iteration, it will call `poll_schedulable_work`, and find no work packets + // to execute. Then the worker will reach here again and wait. + // 3. Mutators may add a `ScheduleCollection` work packet via `GCRequester` to trigger + // GC. It is the only case where a work packet is added by a thread that is not a + // GC worker. Because the mutator must hold the mutex `self.sync` to notify GC + // workers when adding a work packet to a bucket, either of the two can happen: + // 1. The mutator called `work_available.notify_one` after the worker has called + // `wait`, in which case one worker (not necessarily the last parked worker) + // will be waken up. + // 2. The mutator notified when the last worker has just entered this function. + // In that case, ... Oh no! All workers sleep forever! + warn!("Now wait..."); + if rand::random::() { + sync = self.work_available.wait(sync).unwrap(); + warn!("Out from wait."); + } else { + warn!("Emulated spurious wakeup."); + } } // Unpark this worker. diff --git a/src/util/options.rs b/src/util/options.rs index eb6513e1c3..fb51d0fc65 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -706,9 +706,6 @@ options! { /// The GC plan to use. plan: PlanSelector [env_var: true, command_line: true] [always_valid] = PlanSelector::GenImmix, /// Number of GC worker threads. - // FIXME: Currently we create GCWorkScheduler when MMTK is created, which is usually static. - // To allow this as a command-line option, we need to refactor the creation fo the `MMTK` instance. - // See: https://github.com/mmtk/mmtk-core/issues/532 threads: usize [env_var: true, command_line: true] [|v: &usize| *v > 0] = num_cpus::get(), /// Enable an optimization that only scans the part of the stack that has changed since the last GC (not supported) use_short_stack_scans: bool [env_var: true, command_line: true] [always_valid] = false, From 2d52f00d5c30268ae6cfced27a1c5a214162e969 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 5 Jan 2024 03:18:22 +0800 Subject: [PATCH 08/48] Fix a case where workers wait forever Mutators no longer trigger GC by adding ScheduleCollection directly. --- Cargo.toml | 1 - src/plan/gc_requester.rs | 2 +- src/scheduler/scheduler.rs | 114 +++++++++++++++++++++---------------- src/scheduler/worker.rs | 103 +++++++++++++++++++++++---------- 4 files changed, 139 insertions(+), 81 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3461c25835..558f820701 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -50,7 +50,6 @@ static_assertions = "1.1.0" strum = "0.25" strum_macros = "0.25" sysinfo = "0.29" -rand = "0.8.5" [dev-dependencies] paste = "1.0.8" diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 03d54889df..234f74b423 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -49,7 +49,7 @@ impl GCRequester { let should_schedule_gc = self.try_schedule_collection(&mut guard); if should_schedule_gc { - self.scheduler.mutator_schedule_collection(); + self.scheduler.request_schedule_collection(); // Note: We do not clear `request_flag` now. It will be cleared by `clear_request` // after all mutators have stopped. } diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 1c44e8c736..151bdcc8c8 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -91,14 +91,16 @@ impl GCWorkScheduler { self.affinity.resolve_affinity(thread); } - /// Schedule collection. Called via `GCRequester`. - /// Because this function is called by a mutator thread, we only add a `ScheduleCollection` - /// work packet here so that a GC worker can wake up later and actually schedule the work for a - /// collection. - pub(crate) fn mutator_schedule_collection(&self) { - debug!("Adding ScheduleCollection work packet upon mutator request."); - // Add a ScheduleCollection work packet. It is the seed of other work packets. - self.work_buckets[WorkBucketStage::Unconstrained].add(ScheduleCollection); + /// Request a GC to be scheduled. Called by mutator via `GCRequester`. + pub(crate) fn request_schedule_collection(&self) { + debug!("A mutator is sending GC-scheduling request to workers..."); + self.worker_monitor.request_schedule_collection(); + } + + /// Add the `ScheduleCollection` packet. Called by the last parked worker. + fn add_schedule_collection_packet(&self) { + // We are still holding the mutex `WorkerMonitor::sync`. Do not notify now. + self.work_buckets[WorkBucketStage::Unconstrained].add_no_notify(ScheduleCollection); } /// Schedule all the common work packets @@ -363,45 +365,57 @@ impl GCWorkScheduler { return work; } - self.worker_monitor.park_and_wait(worker, || { - // This is the last worker parked. - - // Test whether we are doing GC. - if worker.mmtk.gc_in_progress() { - // We are in the middle of GC, and the last GC worker parked. - trace!("The last worker parked during GC. Try to find more work to do..."); - - // During GC, if all workers parked, all open buckets must have been drained. - self.assert_all_activated_buckets_are_empty(); - - // Find more work for workers to do. - let found_more_work = self.find_more_work_for_workers(); - - if found_more_work { - LastParkedResult::WakeAll - } else { - // GC finished. - let scheduled_next_gc = self.on_gc_finished(worker); - if scheduled_next_gc { - LastParkedResult::WakeSelf - } else { - LastParkedResult::ParkSelf - } - } + self.worker_monitor + .park_and_wait(worker, |should_schedule_gc| { + self.on_last_parked(worker, should_schedule_gc) + }); + } + } + + /// Called when the last worker parked. + /// `should_schedule_gc` is true if a mutator requested a GC. + fn on_last_parked(&self, worker: &GCWorker, should_schedule_gc: bool) -> LastParkedResult { + // Test whether this is happening during GC. + if worker.mmtk.gc_in_progress() { + assert!( + !should_schedule_gc, + "GC request sent to WorkerMonitor while GC is still in progress." + ); + + // We are in the middle of GC, and the last GC worker parked. + trace!("The last worker parked during GC. Try to find more work to do..."); + + // During GC, if all workers parked, all open buckets must have been drained. + self.assert_all_activated_buckets_are_empty(); + + // Find more work for workers to do. + let found_more_work = self.find_more_work_for_workers(); + + if found_more_work { + LastParkedResult::WakeAll + } else { + // GC finished. + let scheduled_next_gc = self.on_gc_finished(worker); + if scheduled_next_gc { + LastParkedResult::WakeSelf } else { - trace!("The last worker parked while not in GC. Wait for GC to start."); - // GC has not started, yet. Do not try to open work buckets. - // - // This branch is usually reached when `initialize_colection` has just been - // called and GC worker threads have just been created. In that case, there is - // no work packets to execute, and workers should park and wait for the first - // GC. - // - // This branch can also be reached if a GC worker spuriously wakes up while not - // in GC. LastParkedResult::ParkSelf } - }); + } + } else { + trace!( + "The last worker parked while not in GC. should_schedule_gc: {}", + should_schedule_gc + ); + + if should_schedule_gc { + // A mutator requested a GC to be scheduled. + self.add_schedule_collection_packet(); + LastParkedResult::WakeSelf + } else { + // Wait until GC is requested. + LastParkedResult::ParkSelf + } } } @@ -506,13 +520,15 @@ impl GCWorkScheduler { // still running, or // 2. It is STW GC, but after the invocation of `resume_mutators` above, one mutator // ran so fast that it triggered a GC before we called `on_gc_finished`. - // Note that we are holding the `WorkerMonitor` mutex, and cannot notify workers now. - // When this function returns, the current worker should continue to execute the newly - // added `ScheduleCollection` work packet. - debug!("GC already requested before `on_gc_finished`. Add ScheduleCollection now."); - self.work_buckets[WorkBucketStage::Unconstrained].add_no_notify(ScheduleCollection); + debug!("GC already requested before `on_gc_finished`. Schedule GC now."); + self.add_schedule_collection_packet(); true } else { + // Note that if a mutator attempts to request GC after `on_gc_finished`, it will call + // `request_schedule_collection`, but will block on the mutex `WorkerMonitor::sync` + // because the current GC worker is holding it. After the current worker calls + // `work_available.wait()`, the mutator will continue and wake up a GC worker (not + // necessarily this one) to schedule the GC. false } } diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 754d50b0a3..74351bedc1 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -98,6 +98,8 @@ pub(crate) struct WorkerMonitorSync { worker_count: usize, /// Number of parked workers. parked_workers: usize, + /// True if a mutator has requested the workers to schedule a GC. + should_schedule_gc: bool, } impl WorkerMonitor { @@ -106,12 +108,26 @@ impl WorkerMonitor { sync: Mutex::new(WorkerMonitorSync { worker_count, parked_workers: 0, + should_schedule_gc: false, }), work_available: Default::default(), } } - /// Wake up workers when more work packets are made available for workers. + /// Request a GC worker to schedule the next GC. + /// Callable from mutator threads. + pub fn request_schedule_collection(&self) { + let mut guard = self.sync.lock().unwrap(); + assert!( + !guard.should_schedule_gc, + "should_schedule_gc is already set" + ); + guard.should_schedule_gc = true; + self.notify_work_available_inner(false, &mut guard); + } + + /// Wake up workers when more work packets are made available for workers, + /// or a mutator has requested the GC workers to schedule a GC. /// This function is called when adding work packets to buckets. pub fn notify_work_available(&self, all: bool) { let mut guard = self.sync.lock().unwrap(); @@ -130,17 +146,15 @@ impl WorkerMonitor { /// Park a worker until work packets are available. /// If it is the last worker parked, `on_last_parked` will be called. - /// The return value of `on_last_parked` will determine whether this worker will block wait, - /// too, and whether other worker will be waken up. + /// The argument is true if `sync.gc_requested` is `true`, + /// and `sync.gc_requested` will be cleared to `false` regardless of its value. + /// The return value of `on_last_parked` will determine whether this worker will block and + /// wait, too, and whether other worker will be waken up. pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) where VM: VMBinding, - F: FnOnce() -> LastParkedResult, + F: FnOnce(bool) -> LastParkedResult, { - warn!("Sleep before acquiring self.sync..."); - std::thread::sleep(std::time::Duration::from_millis(30)); - warn!("Slept. Now acquire self.sync..."); - let mut sync = self.sync.lock().unwrap(); // Park this worker @@ -157,7 +171,8 @@ impl WorkerMonitor { if all_parked { debug!("Worker {} is the last worker parked.", worker.ordinal); - let result = on_last_parked(); + let should_schedule_gc = std::mem::replace(&mut sync.should_schedule_gc, false); + let result = on_last_parked(should_schedule_gc); match result { LastParkedResult::ParkSelf => { should_wait = true; @@ -174,33 +189,61 @@ impl WorkerMonitor { } if should_wait { - // Note: + // Notes on CondVar usage: + // + // Conditional variables may spurious wake up. Therefore, they are usually tested in a + // loop while holding a mutex + // + // lock(); + // while condition() { + // condvar.wait(); + // } + // unlock(); + // + // The actual condition for this `self.work_available.wait(sync)` is: + // + // 1. any work packet is available, or + // 2. a request for scheduling GC is submitted. + // + // But it is not used like the typical use pattern shown above, mainly because work + // packets can be added without holding the mutex `self.sync`. This means one worker + // can add a new work packet (no mutex needed) right after another worker finds no work + // packets are available and then park. In other words, condition (1) can suddenly + // become true after a worker sees it is false but before the worker blocks waiting on + // the CondVar. If this happens, the last parked worker will block forever and never + // get notified. This may happen if mutators or the previously existing "coordinator + // thread" can add work packets. + // + // However, after the "coordinator thread" was removed, only GC worker threads can add + // work packets during GC. Parked workers (except the last parked worker) cannot make + // more work packets availble (by adding new packets or opening buckets). For this + // reason, the **last** parked worker can be sure that after it finds no packets + // available, no other workers can add another work packet (because they all parked). + // So the **last** parked worker can open more buckets or declare GC finished. + // + // Condition (2), i.e. `sync.should_schedule_gc` is guarded by the mutator `sync`. + // When set (by a mutator via `request_schedule_collection`), it will notify a + // worker; and the last parked worker always checks it before waiting. So this + // condition will not be set without any worker noticing. + // + // Note that generational barriers may add `ProcessModBuf` work packets when not in GC. + // This is benign because those work packets are not executed immediately, and are + // guaranteed to be executed in the next GC. + // + // Notes on spurious wake-up: + // // 1. The condition variable `work_available` is guarded by `self.sync`. Because the // last parked worker is holding the mutex `self.sync` when executing // `on_last_parked`, no workers can unpark (even if they spuriously wake up) during // `on_last_parked` because they cannot re-acquire the mutex `self.sync`. + // // 2. Workers may spuriously wake up and unpark when `on_last_parked` is not being // executed (including the case when the last parked worker is waiting here, too). - // Spurious wake-up is safe because the actual condition for this `cond.wait` is - // "more work is available". If a worker spuriously wakes up, then in the next - // loop iteration, it will call `poll_schedulable_work`, and find no work packets - // to execute. Then the worker will reach here again and wait. - // 3. Mutators may add a `ScheduleCollection` work packet via `GCRequester` to trigger - // GC. It is the only case where a work packet is added by a thread that is not a - // GC worker. Because the mutator must hold the mutex `self.sync` to notify GC - // workers when adding a work packet to a bucket, either of the two can happen: - // 1. The mutator called `work_available.notify_one` after the worker has called - // `wait`, in which case one worker (not necessarily the last parked worker) - // will be waken up. - // 2. The mutator notified when the last worker has just entered this function. - // In that case, ... Oh no! All workers sleep forever! - warn!("Now wait..."); - if rand::random::() { - sync = self.work_available.wait(sync).unwrap(); - warn!("Out from wait."); - } else { - warn!("Emulated spurious wakeup."); - } + // If one or more GC workers spuriously wake up, they will check for work packets, + // and park again if not available. The last parked worker will ensure the two + // conditions listed above are both false before blocking. If either condition is + // true, the last parked worker will take action. + sync = self.work_available.wait(sync).unwrap(); } // Unpark this worker. From fa2aa5012281ae9744be77b89cd442d4407b8b87 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 5 Jan 2024 18:04:15 +0800 Subject: [PATCH 09/48] Added comments and removed redundant code. --- src/global_state.rs | 19 ++++++++++++------ src/mmtk.rs | 2 ++ src/plan/gc_requester.rs | 42 +++++++++++++++++++--------------------- src/scheduler/worker.rs | 31 ++++++++++++++++------------- 4 files changed, 53 insertions(+), 41 deletions(-) diff --git a/src/global_state.rs b/src/global_state.rs index 4d57fd2e23..468efb9f37 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -233,13 +233,20 @@ impl Default for GlobalState { } } -/// GC status, an indicator of whether MMTk is in the progress of a GC or not. +/// GC status. /// -/// This type was only used for assertions in JikesRVM. After we removed the coordinator (a.k.a. -/// controller) thread, we use the GC status to decide whether GC workers should open new work -/// buckets when the last worker parked. -// FIXME: `GcProper` is inherited from JikesRVM, but it is not used in MMTk core. Consider -// removing it. +/// - It starts in the `NotInGC` state. +/// - It enters `GcPrepare` when GC starts (i.e. when `ScheduleCollection` is executed). +/// - It enters `GcProper` when all mutators have stopped. +/// - It returns to `NotInGC` when GC finishes. +/// +/// All states except `NotInGC` are considered "in GC". +/// +/// The status is checked by GC workers. The last parked worker only tries to open new buckets +/// during GC. +/// +/// The distinction between `GcPrepare` and `GcProper` is inherited from JikesRVM. Several +/// assertions in JikesRVM involve those two states. #[derive(PartialEq, Clone, Copy, NoUninit, Debug)] #[repr(u8)] pub enum GcStatus { diff --git a/src/mmtk.rs b/src/mmtk.rs index 9c433fa06d..44ef399083 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -344,6 +344,8 @@ impl MMTK { self.state .internal_triggered_collection .store(true, Ordering::Relaxed); + // TODO: The current `GCRequester::request()` is probably incorrect for internally triggered GC. + // Consider removing functions related to "internal triggered collection". self.gc_requester.request(); } diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 234f74b423..43ae8e2abd 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -1,24 +1,33 @@ use crate::scheduler::GCWorkScheduler; use crate::vm::VMBinding; -use std::marker::PhantomData; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex}; struct RequestSync { - /// Has the GCRequester called `GCWorkScheduler::schedule_collection` for the current request? - /// This flag exists so that once `GCRequester` called `GCWorkScheduler::schedule_collection`, - /// it cannot call it again until the GC it initiated finished. + /// Are the GC workers already aware that we requested a GC? + /// + /// Mutators call `GCRequester::request()` to trigger GC. It communicates with workers by + /// calling `GCWorkScheduler::request_schedule_collection`. Under the hood, it sets a + /// synchronized variable and notifies one worker. Conceptually, it is sending a message to GC + /// workers. + /// + /// The purpose of this variable is preventing the message above from being sent while GC + /// workers are still doing GC. This is mainly significant for concurrent GC. In the current + /// design (inherited from JikesRVM), `request_flag` is cleared when all mutators are + /// suspended, at which time GC is still in progress. If `GCRequester::request()` is called + /// after `request_flag` is cleared but before `on_gc_finished` is called, the mutator will not + /// send the message. But GC workers will check `request_flag` when GC finishes, and schedule + /// the next GC. gc_scheduled: bool, } -/// This data structure lets mutators trigger GC, and may schedule collection when appropriate. +/// This data structure lets mutators trigger GC. pub struct GCRequester { request_sync: Mutex, /// An atomic flag outside `RequestSync` so that mutators can check if GC has already been /// requested in `poll` without acquiring the mutex. request_flag: AtomicBool, scheduler: Arc>, - phantom: PhantomData, } impl GCRequester { @@ -29,12 +38,11 @@ impl GCRequester { }), request_flag: AtomicBool::new(false), scheduler, - phantom: PhantomData, } } /// Request a GC. Called by mutators when polling (during allocation) and when handling user - /// GC requests (e.g. `System.gc();` in Java); + /// GC requests (e.g. `System.gc();` in Java). pub fn request(&self) { // Note: This is the double-checked locking algorithm. // The load has the `Relaxed` order instead of `Acquire` because we are not doing lazy @@ -56,12 +64,6 @@ impl GCRequester { } } - /// Returns true if GC has been scheduled. - pub fn is_gc_scheduled(&self) -> bool { - let guard = self.request_sync.lock().unwrap(); - guard.gc_scheduled - } - /// Clear the "GC requested" flag so that mutators can trigger the next GC. /// Called by a GC worker when all mutators have come to a stop. pub fn clear_request(&self) { @@ -82,16 +84,12 @@ impl GCRequester { /// Decide whether we should schedule a new collection. Will transition the state of /// `gc_scheduled` from `false` to `true` if we should schedule a new collection. + /// /// Return `true` if the state transition happens. fn try_schedule_collection(&self, sync: &mut RequestSync) -> bool { - // The time to schedule a collection is when `request_flag` is `true` but `gc_scheduled` - // is `false`. `gc_scheduled` is `true` if either - // - // 1. another mutator called `request()` concurrently and scheduled a collection, or - // 2. a new GC is requested while the current GC is still in progress. - // - // If `gc_scheduled` is `true` when GC is requested, we do nothing now. But when the - // currrent GC finishes, a GC worker will call `on_gc_finished` which clears the + // The time to schedule a collection is when `request_flag` is `true` but `gc_scheduled` is + // `false`. If `gc_scheduled` is `true` when GC is requested, we do nothing now. But when + // the currrent GC finishes, a GC worker will call `on_gc_finished` which clears the // `gc_scheduled` flag, and checks the `request_flag` again to trigger the next GC. if self.request_flag.load(Ordering::Relaxed) && !sync.gc_scheduled { sync.gc_scheduled = true; diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 74351bedc1..302bc9d239 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -82,13 +82,16 @@ pub(crate) enum LastParkedResult { WakeAll, } -/// A data structure for parking and waking up workers. -/// It keeps track of the number of workers parked, and allow the last parked worker to perform -/// operations that can only be performed when all workers have parked. +/// A data structure for synchronizing workers. +/// +/// - It allows workers to park and unpark. It keeps track of the number of workers parked, and +/// allows the last parked worker to perform operations that can only be performed when all +/// workers have parked. +/// - It allows mutators to notify workers to schedule a GC. pub(crate) struct WorkerMonitor { /// The synchronized part. sync: Mutex, - /// This is notified when new work is made available for the workers. + /// This is notified when new work packets are available, or a mutator has requested GC. work_available: Condvar, } @@ -99,6 +102,10 @@ pub(crate) struct WorkerMonitorSync { /// Number of parked workers. parked_workers: usize, /// True if a mutator has requested the workers to schedule a GC. + /// + /// Consider this as a one-element message queue. The last parked worker will poll this + /// "queue" by reading this field and setting it to `false`. The last parked worker will + /// schedule a GC whenever seeing `true` in this field. should_schedule_gc: bool, } @@ -128,7 +135,6 @@ impl WorkerMonitor { /// Wake up workers when more work packets are made available for workers, /// or a mutator has requested the GC workers to schedule a GC. - /// This function is called when adding work packets to buckets. pub fn notify_work_available(&self, all: bool) { let mut guard = self.sync.lock().unwrap(); self.notify_work_available_inner(all, &mut guard); @@ -144,12 +150,12 @@ impl WorkerMonitor { } } - /// Park a worker until work packets are available. + /// Park a worker and wait on the CondVar `work_available`. + /// /// If it is the last worker parked, `on_last_parked` will be called. - /// The argument is true if `sync.gc_requested` is `true`, - /// and `sync.gc_requested` will be cleared to `false` regardless of its value. - /// The return value of `on_last_parked` will determine whether this worker will block and - /// wait, too, and whether other worker will be waken up. + /// The argument of `on_last_parked` is true if `sync.gc_requested` is `true`. + /// The return value of `on_last_parked` will determine whether this worker and other workers + /// will wake up or block waiting. pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) where VM: VMBinding, @@ -191,8 +197,7 @@ impl WorkerMonitor { if should_wait { // Notes on CondVar usage: // - // Conditional variables may spurious wake up. Therefore, they are usually tested in a - // loop while holding a mutex + // Conditional variables are usually tested in a loop while holding a mutex // // lock(); // while condition() { @@ -229,7 +234,7 @@ impl WorkerMonitor { // Note that generational barriers may add `ProcessModBuf` work packets when not in GC. // This is benign because those work packets are not executed immediately, and are // guaranteed to be executed in the next GC. - // + // Notes on spurious wake-up: // // 1. The condition variable `work_available` is guarded by `self.sync`. Because the From 5bda9ee3362887a2b0499eabb347191f5d887cb6 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 11 Jan 2024 16:22:20 +0800 Subject: [PATCH 10/48] Move the gc_start trace point. --- src/scheduler/gc_work.rs | 3 --- src/scheduler/scheduler.rs | 4 ++++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs index 2be49a06eb..a651437911 100644 --- a/src/scheduler/gc_work.rs +++ b/src/scheduler/gc_work.rs @@ -14,9 +14,6 @@ pub struct ScheduleCollection; impl GCWork for ScheduleCollection { fn do_work(&mut self, worker: &mut GCWorker, mmtk: &'static MMTK) { - // This is officially when a GC starts. - probe!(mmtk, gc_start); - // Record the time when GC starts. { let mut guard = mmtk.state.gc_start_time.borrow_mut(); diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 151bdcc8c8..927dd8fbd0 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -99,6 +99,10 @@ impl GCWorkScheduler { /// Add the `ScheduleCollection` packet. Called by the last parked worker. fn add_schedule_collection_packet(&self) { + // We set the eBPF trace point here so that bpftrace scripts can start recording work + // packet events before the `ScheduleCollection` work packet starts. + probe!(mmtk, gc_start); + // We are still holding the mutex `WorkerMonitor::sync`. Do not notify now. self.work_buckets[WorkBucketStage::Unconstrained].add_no_notify(ScheduleCollection); } From 72e6eb93f0c119c7fe44dbf1b148921e32c3219c Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 12 Jan 2024 14:27:57 +0800 Subject: [PATCH 11/48] Remove the EndOfGC work packet. --- src/scheduler/gc_work.rs | 51 ---------------------------------------- 1 file changed, 51 deletions(-) diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs index a651437911..1084cffc57 100644 --- a/src/scheduler/gc_work.rs +++ b/src/scheduler/gc_work.rs @@ -222,57 +222,6 @@ impl GCWork for StopMutators { } } -#[derive(Default)] -pub struct EndOfGC { - pub elapsed: std::time::Duration, -} - -impl GCWork for EndOfGC { - fn do_work(&mut self, worker: &mut GCWorker, mmtk: &'static MMTK) { - info!( - "End of GC ({}/{} pages, took {} ms)", - mmtk.get_plan().get_reserved_pages(), - mmtk.get_plan().get_total_pages(), - self.elapsed.as_millis() - ); - - #[cfg(feature = "count_live_bytes_in_gc")] - { - let live_bytes = mmtk.state.get_live_bytes_in_last_gc(); - let used_bytes = - mmtk.get_plan().get_used_pages() << crate::util::constants::LOG_BYTES_IN_PAGE; - debug_assert!( - live_bytes <= used_bytes, - "Live bytes of all live objects ({} bytes) is larger than used pages ({} bytes), something is wrong.", - live_bytes, used_bytes - ); - info!( - "Live objects = {} bytes ({:04.1}% of {} used pages)", - live_bytes, - live_bytes as f64 * 100.0 / used_bytes as f64, - mmtk.get_plan().get_used_pages() - ); - } - - // We assume this is the only running work packet that accesses plan at the point of execution - let plan_mut: &mut dyn Plan = unsafe { mmtk.get_plan_mut() }; - plan_mut.end_of_gc(worker.tls); - - #[cfg(feature = "extreme_assertions")] - if crate::util::edge_logger::should_check_duplicate_edges(mmtk.get_plan()) { - // reset the logging info at the end of each GC - mmtk.edge_logger.reset(); - } - - // Reset the triggering information. - mmtk.state.reset_collection_trigger(); - - // Set to NotInGC after everything, and right before resuming mutators. - mmtk.set_gc_status(GcStatus::NotInGC); - ::VMCollection::resume_mutators(worker.tls); - } -} - /// This implements `ObjectTracer` by forwarding the `trace_object` calls to the wrapped /// `ProcessEdgesWork` instance. pub(crate) struct ProcessEdgesWorkTracer { From 742c3a22ec572bc99bc4c8bdcddacd8b8b5283fe Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 12 Jan 2024 18:55:56 +0800 Subject: [PATCH 12/48] WIP: Refactor to support forking --- src/global_state.rs | 9 --- src/plan/gc_requester.rs | 68 +----------------- src/scheduler/gc_work.rs | 10 --- src/scheduler/scheduler.rs | 139 ++++++++++++++++++------------------- src/scheduler/worker.rs | 66 ++++++++++++------ 5 files changed, 115 insertions(+), 177 deletions(-) diff --git a/src/global_state.rs b/src/global_state.rs index 468efb9f37..1c7b7ee627 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -1,8 +1,6 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::time::Instant; use atomic::Atomic; -use atomic_refcell::AtomicRefCell; use bytemuck::NoUninit; /// This stores some global states for an MMTK instance. @@ -44,12 +42,6 @@ pub struct GlobalState { pub(crate) stacks_prepared: AtomicBool, /// A counter that keeps tracks of the number of bytes allocated since last stress test pub(crate) allocation_bytes: AtomicUsize, - /// The time when the current GC started. Currently only used for logging. - /// Note that some (but not all) `GCTriggerPolicy` implementations do their own time tracking - /// independently for their own need. - /// This field only accessible in `ScheduleCollection` and `GCWorkScheduler::on_gc_finished` - /// which never happen at the same time, so `AtomicRefCell` is enough. - pub(crate) gc_start_time: AtomicRefCell>, /// A counteer that keeps tracks of the number of bytes allocated by malloc #[cfg(feature = "malloc_counted_size")] pub(crate) malloc_bytes: AtomicUsize, @@ -224,7 +216,6 @@ impl Default for GlobalState { cur_collection_attempts: AtomicUsize::new(0), scanned_stacks: AtomicUsize::new(0), allocation_bytes: AtomicUsize::new(0), - gc_start_time: AtomicRefCell::new(None), #[cfg(feature = "malloc_counted_size")] malloc_bytes: AtomicUsize::new(0), #[cfg(feature = "count_live_bytes_in_gc")] diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 43ae8e2abd..ff2aa7e0b8 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -1,29 +1,10 @@ use crate::scheduler::GCWorkScheduler; use crate::vm::VMBinding; use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex}; - -struct RequestSync { - /// Are the GC workers already aware that we requested a GC? - /// - /// Mutators call `GCRequester::request()` to trigger GC. It communicates with workers by - /// calling `GCWorkScheduler::request_schedule_collection`. Under the hood, it sets a - /// synchronized variable and notifies one worker. Conceptually, it is sending a message to GC - /// workers. - /// - /// The purpose of this variable is preventing the message above from being sent while GC - /// workers are still doing GC. This is mainly significant for concurrent GC. In the current - /// design (inherited from JikesRVM), `request_flag` is cleared when all mutators are - /// suspended, at which time GC is still in progress. If `GCRequester::request()` is called - /// after `request_flag` is cleared but before `on_gc_finished` is called, the mutator will not - /// send the message. But GC workers will check `request_flag` when GC finishes, and schedule - /// the next GC. - gc_scheduled: bool, -} +use std::sync::Arc; /// This data structure lets mutators trigger GC. pub struct GCRequester { - request_sync: Mutex, /// An atomic flag outside `RequestSync` so that mutators can check if GC has already been /// requested in `poll` without acquiring the mutex. request_flag: AtomicBool, @@ -33,9 +14,6 @@ pub struct GCRequester { impl GCRequester { pub fn new(scheduler: Arc>) -> Self { GCRequester { - request_sync: Mutex::new(RequestSync { - gc_scheduled: false, - }), request_flag: AtomicBool::new(false), scheduler, } @@ -44,58 +22,18 @@ impl GCRequester { /// Request a GC. Called by mutators when polling (during allocation) and when handling user /// GC requests (e.g. `System.gc();` in Java). pub fn request(&self) { - // Note: This is the double-checked locking algorithm. - // The load has the `Relaxed` order instead of `Acquire` because we are not doing lazy - // initialization here. We are only using this flag to remove successive requests. if self.request_flag.load(Ordering::Relaxed) { return; } - let mut guard = self.request_sync.lock().unwrap(); - if !self.request_flag.load(Ordering::Relaxed) { - self.request_flag.store(true, Ordering::Relaxed); - - let should_schedule_gc = self.try_schedule_collection(&mut guard); - if should_schedule_gc { - self.scheduler.request_schedule_collection(); - // Note: We do not clear `request_flag` now. It will be cleared by `clear_request` - // after all mutators have stopped. - } + if !self.request_flag.swap(true, Ordering::Relaxed) { + self.scheduler.request_schedule_collection(); } } /// Clear the "GC requested" flag so that mutators can trigger the next GC. /// Called by a GC worker when all mutators have come to a stop. pub fn clear_request(&self) { - let _guard = self.request_sync.lock().unwrap(); self.request_flag.store(false, Ordering::Relaxed); } - - /// Called by a GC worker when a GC has finished. - /// This will check the `request_flag` again and check if we should immediately schedule the - /// next GC. If we should, `gc_scheduled` will be set back to `true` and this function will - /// return `true`. - pub fn on_gc_finished(&self) -> bool { - let mut guard = self.request_sync.lock().unwrap(); - guard.gc_scheduled = false; - - self.try_schedule_collection(&mut guard) - } - - /// Decide whether we should schedule a new collection. Will transition the state of - /// `gc_scheduled` from `false` to `true` if we should schedule a new collection. - /// - /// Return `true` if the state transition happens. - fn try_schedule_collection(&self, sync: &mut RequestSync) -> bool { - // The time to schedule a collection is when `request_flag` is `true` but `gc_scheduled` is - // `false`. If `gc_scheduled` is `true` when GC is requested, we do nothing now. But when - // the currrent GC finishes, a GC worker will call `on_gc_finished` which clears the - // `gc_scheduled` flag, and checks the `request_flag` again to trigger the next GC. - if self.request_flag.load(Ordering::Relaxed) && !sync.gc_scheduled { - sync.gc_scheduled = true; - true - } else { - false - } - } } diff --git a/src/scheduler/gc_work.rs b/src/scheduler/gc_work.rs index 1084cffc57..8563693cdc 100644 --- a/src/scheduler/gc_work.rs +++ b/src/scheduler/gc_work.rs @@ -14,16 +14,6 @@ pub struct ScheduleCollection; impl GCWork for ScheduleCollection { fn do_work(&mut self, worker: &mut GCWorker, mmtk: &'static MMTK) { - // Record the time when GC starts. - { - let mut guard = mmtk.state.gc_start_time.borrow_mut(); - let old_time = guard.replace(std::time::Instant::now()); - debug_assert!( - old_time.is_none(), - "gc_start_time is still set when GC started: {old_time:?}", - ); - } - // Tell GC trigger that GC started. mmtk.gc_trigger.policy.on_gc_start(mmtk); diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 927dd8fbd0..46bd5c4d43 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -1,11 +1,11 @@ use super::gc_work::ScheduleCollection; use super::stat::SchedulerStat; use super::work_bucket::*; -use super::worker::{GCWorker, ThreadId, WorkerGroup, WorkerMonitor}; +use super::worker::{GCWorker, ThreadId, WorkerGoals, WorkerGroup, WorkerMonitor}; use super::*; use crate::global_state::GcStatus; use crate::mmtk::MMTK; -use crate::scheduler::worker::LastParkedResult; +use crate::scheduler::worker::{LastParkedResult, WorkerGoal}; use crate::util::opaque_pointer::*; use crate::util::options::AffinityKind; use crate::util::rust_util::array_from_fn; @@ -16,6 +16,7 @@ use crossbeam::deque::Steal; use enum_map::{Enum, EnumMap}; use std::collections::HashMap; use std::sync::Arc; +use std::time::Instant; pub struct GCWorkScheduler { /// Work buckets @@ -99,10 +100,6 @@ impl GCWorkScheduler { /// Add the `ScheduleCollection` packet. Called by the last parked worker. fn add_schedule_collection_packet(&self) { - // We set the eBPF trace point here so that bpftrace scripts can start recording work - // packet events before the `ScheduleCollection` work packet starts. - probe!(mmtk, gc_start); - // We are still holding the mutex `WorkerMonitor::sync`. Do not notify now. self.work_buckets[WorkBucketStage::Unconstrained].add_no_notify(ScheduleCollection); } @@ -370,59 +367,85 @@ impl GCWorkScheduler { } self.worker_monitor - .park_and_wait(worker, |should_schedule_gc| { - self.on_last_parked(worker, should_schedule_gc) - }); + .park_and_wait(worker, |goals| self.on_last_parked(worker, goals)); } } /// Called when the last worker parked. /// `should_schedule_gc` is true if a mutator requested a GC. - fn on_last_parked(&self, worker: &GCWorker, should_schedule_gc: bool) -> LastParkedResult { - // Test whether this is happening during GC. - if worker.mmtk.gc_in_progress() { - assert!( - !should_schedule_gc, - "GC request sent to WorkerMonitor while GC is still in progress." - ); + fn on_last_parked(&self, worker: &GCWorker, goals: &mut WorkerGoals) -> LastParkedResult { + let Some(ref current_goal) = goals.current else { + // There is no goal. Find a request to respond to. + return self.respond_to_requests(goals); + }; + + match current_goal { + worker::WorkerGoal::Gc { start_time } => { + // We are in the progress of GC. - // We are in the middle of GC, and the last GC worker parked. - trace!("The last worker parked during GC. Try to find more work to do..."); + // In stop-the-world GC, mutators cannot request for GC while GC is in progress. + // When we support concurrent GC, we should remove this assertion. + assert!( + !goals.requests.gc, + "GC request sent to WorkerMonitor while GC is still in progress." + ); - // During GC, if all workers parked, all open buckets must have been drained. - self.assert_all_activated_buckets_are_empty(); + // We are in the middle of GC, and the last GC worker parked. + trace!("The last worker parked during GC. Try to find more work to do..."); - // Find more work for workers to do. - let found_more_work = self.find_more_work_for_workers(); + // During GC, if all workers parked, all open buckets must have been drained. + self.assert_all_activated_buckets_are_empty(); - if found_more_work { - LastParkedResult::WakeAll - } else { - // GC finished. - let scheduled_next_gc = self.on_gc_finished(worker); - if scheduled_next_gc { - LastParkedResult::WakeSelf + // Find more work for workers to do. + let found_more_work = self.find_more_work_for_workers(); + + if found_more_work { + LastParkedResult::WakeAll } else { - LastParkedResult::ParkSelf + // GC finished. + self.on_gc_finished(worker, start_time); + + // Clear the current goal + goals.current = None; + self.respond_to_requests(goals) } } - } else { - trace!( - "The last worker parked while not in GC. should_schedule_gc: {}", - should_schedule_gc - ); - - if should_schedule_gc { - // A mutator requested a GC to be scheduled. - self.add_schedule_collection_packet(); - LastParkedResult::WakeSelf - } else { - // Wait until GC is requested. - LastParkedResult::ParkSelf + worker::WorkerGoal::StopForFork => { + // A worker parked again when it is asked to exit. + unimplemented!() } } } + /// Respond to a worker reqeust. + fn respond_to_requests(&self, goals: &mut WorkerGoals) -> LastParkedResult { + assert!(goals.current.is_none()); + + if goals.requests.gc { + // A mutator requested a GC to be scheduled. + goals.requests.gc = false; + + // We set the eBPF trace point here so that bpftrace scripts can start recording work + // packet events before the `ScheduleCollection` work packet starts. + probe!(mmtk, gc_start); + + goals.current = Some(WorkerGoal::Gc { + start_time: Instant::now(), + }); + + self.add_schedule_collection_packet(); + return LastParkedResult::WakeSelf; + } + + if goals.requests.stop_for_fork { + // The VM wants to fork. GC threads should exit. + unimplemented!() + } + + // No reqeusts. Park this worker, too. + return LastParkedResult::ParkSelf; + } + /// Find more work for workers to do. Return true if more work is available. fn find_more_work_for_workers(&self) -> bool { if self.worker_group.has_designated_work() { @@ -448,7 +471,7 @@ impl GCWorkScheduler { /// Called when GC has finished, i.e. when all work packets have been executed. /// Return `true` if it scheduled the next GC immediately. - fn on_gc_finished(&self, worker: &GCWorker) -> bool { + fn on_gc_finished(&self, worker: &GCWorker, start_time: &Instant) { // All GC workers must have parked by now. debug_assert!(!self.worker_group.has_designated_work()); debug_assert!(self.all_buckets_empty()); @@ -463,11 +486,7 @@ impl GCWorkScheduler { mmtk.gc_trigger.policy.on_gc_end(mmtk); // Compute the elapsed time of the GC. - let gc_start = { - let mut guard = mmtk.state.gc_start_time.borrow_mut(); - guard.take().expect("gc_start_time was not set") - }; - let elapsed = gc_start.elapsed(); + let elapsed = start_time.elapsed(); info!( "End of GC ({}/{} pages, took {} ms)", @@ -513,28 +532,6 @@ impl GCWorkScheduler { // Set to NotInGC after everything, and right before resuming mutators. mmtk.set_gc_status(GcStatus::NotInGC); ::VMCollection::resume_mutators(worker.tls); - - // Notify the `GCRequester` that GC has finished. - let should_schedule_gc_now = mmtk.gc_requester.on_gc_finished(); - if should_schedule_gc_now { - // We should schedule the next GC immediately. This means GC was triggered between - // `clear_request` (when stacks were scanned) and `on_gc_finished` (right above). This - // can happen if - // 1. It is concurrent GC, and a mutator triggered another GC while the current GC was - // still running, or - // 2. It is STW GC, but after the invocation of `resume_mutators` above, one mutator - // ran so fast that it triggered a GC before we called `on_gc_finished`. - debug!("GC already requested before `on_gc_finished`. Schedule GC now."); - self.add_schedule_collection_packet(); - true - } else { - // Note that if a mutator attempts to request GC after `on_gc_finished`, it will call - // `request_schedule_collection`, but will block on the mutex `WorkerMonitor::sync` - // because the current GC worker is holding it. After the current worker calls - // `work_available.wait()`, the mutator will continue and wake up a GC worker (not - // necessarily this one) to schedule the GC. - false - } } pub fn enable_stat(&self) { diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 98806c561f..10e061fb89 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -91,8 +91,9 @@ pub(crate) enum LastParkedResult { pub(crate) struct WorkerMonitor { /// The synchronized part. sync: Mutex, - /// This is notified when new work packets are available, or a mutator has requested GC. - work_available: Condvar, + /// Notified if workers have anything to do. That include any work packets available, and any + /// field in `sync.goals.requests` set to true. + have_anything_to_do: Condvar, } /// The synchronized part of `WorkerMonitor`. @@ -101,12 +102,36 @@ pub(crate) struct WorkerMonitorSync { worker_count: usize, /// Number of parked workers. parked_workers: usize, - /// True if a mutator has requested the workers to schedule a GC. - /// - /// Consider this as a one-element message queue. The last parked worker will poll this - /// "queue" by reading this field and setting it to `false`. The last parked worker will - /// schedule a GC whenever seeing `true` in this field. - should_schedule_gc: bool, + /// Current and requested goals. + goals: WorkerGoals, +} + +#[derive(Default)] +pub(crate) struct WorkerGoals { + /// What are the workers doing now? + pub(crate) current: Option, + /// Requests received from mutators. + pub(crate) requests: WorkerRequests, +} + +/// The thing workers are currently doing. This affects several things, such as what the last +/// parked worker will do, and whether workers will stop themselves. +pub(crate) enum WorkerGoal { + Gc { start_time: std::time::Instant }, + StopForFork, +} + +/// Reqeusts received from mutators. Workers respond to those requests when they do not have a +/// current goal. Multiple things can be requested at the same time, and workers respond to the +/// thing with the highest priority. +/// +/// The fields of this structs are ordered with decreasing priority. +#[derive(Default)] // All fields should be false by default. +pub(crate) struct WorkerRequests { + /// The VM needs to fork. Workers should save their contexts and exit. + pub(crate) stop_for_fork: bool, + /// GC is requested. Workers should schedule a GC. + pub(crate) gc: bool, } impl WorkerMonitor { @@ -115,9 +140,9 @@ impl WorkerMonitor { sync: Mutex::new(WorkerMonitorSync { worker_count, parked_workers: 0, - should_schedule_gc: false, + goals: Default::default(), }), - work_available: Default::default(), + have_anything_to_do: Default::default(), } } @@ -125,12 +150,10 @@ impl WorkerMonitor { /// Callable from mutator threads. pub fn request_schedule_collection(&self) { let mut guard = self.sync.lock().unwrap(); - assert!( - !guard.should_schedule_gc, - "should_schedule_gc is already set" - ); - guard.should_schedule_gc = true; - self.notify_work_available_inner(false, &mut guard); + if guard.goals.requests.gc == false { + guard.goals.requests.gc = true; + self.notify_work_available_inner(false, &mut guard); + } } /// Wake up workers when more work packets are made available for workers, @@ -144,9 +167,9 @@ impl WorkerMonitor { /// mutex of `WorkerMonitorSync`. fn notify_work_available_inner(&self, all: bool, _guard: &mut MutexGuard) { if all { - self.work_available.notify_all(); + self.have_anything_to_do.notify_all(); } else { - self.work_available.notify_one(); + self.have_anything_to_do.notify_one(); } } @@ -159,7 +182,7 @@ impl WorkerMonitor { pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) where VM: VMBinding, - F: FnOnce(bool) -> LastParkedResult, + F: FnOnce(&mut WorkerGoals) -> LastParkedResult, { let mut sync = self.sync.lock().unwrap(); @@ -177,8 +200,7 @@ impl WorkerMonitor { if all_parked { debug!("Worker {} is the last worker parked.", worker.ordinal); - let should_schedule_gc = std::mem::replace(&mut sync.should_schedule_gc, false); - let result = on_last_parked(should_schedule_gc); + let result = on_last_parked(&mut sync.goals); match result { LastParkedResult::ParkSelf => { should_wait = true; @@ -248,7 +270,7 @@ impl WorkerMonitor { // and park again if not available. The last parked worker will ensure the two // conditions listed above are both false before blocking. If either condition is // true, the last parked worker will take action. - sync = self.work_available.wait(sync).unwrap(); + sync = self.have_anything_to_do.wait(sync).unwrap(); } // Unpark this worker. From 90cb6df46af05f0beb087667911a435858a18bf5 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 12 Jan 2024 19:28:04 +0800 Subject: [PATCH 13/48] Coding style change --- src/scheduler/scheduler.rs | 2 +- src/scheduler/worker.rs | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 46bd5c4d43..408ef35bc6 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -443,7 +443,7 @@ impl GCWorkScheduler { } // No reqeusts. Park this worker, too. - return LastParkedResult::ParkSelf; + LastParkedResult::ParkSelf } /// Find more work for workers to do. Return true if more work is available. diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 10e061fb89..310edf2aa8 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -117,7 +117,10 @@ pub(crate) struct WorkerGoals { /// The thing workers are currently doing. This affects several things, such as what the last /// parked worker will do, and whether workers will stop themselves. pub(crate) enum WorkerGoal { - Gc { start_time: std::time::Instant }, + Gc { + start_time: std::time::Instant, + }, + #[allow(unused)] // TODO: Implement forking support later. StopForFork, } @@ -150,7 +153,7 @@ impl WorkerMonitor { /// Callable from mutator threads. pub fn request_schedule_collection(&self) { let mut guard = self.sync.lock().unwrap(); - if guard.goals.requests.gc == false { + if !guard.goals.requests.gc { guard.goals.requests.gc = true; self.notify_work_available_inner(false, &mut guard); } From 2a25331731ca17ea80ca9b3718db278b69e0d055 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Sat, 13 Jan 2024 15:38:38 +0800 Subject: [PATCH 14/48] Split WorkerGoals and WorkerMonitor They are now too complex and should be put in dedicatd files. --- src/plan/gc_requester.rs | 7 +- src/scheduler/mod.rs | 2 + src/scheduler/scheduler.rs | 18 ++- src/scheduler/work_bucket.rs | 2 +- src/scheduler/worker.rs | 247 +------------------------------- src/scheduler/worker_goals.rs | 47 ++++++ src/scheduler/worker_monitor.rs | 238 ++++++++++++++++++++++++++++++ 7 files changed, 310 insertions(+), 251 deletions(-) create mode 100644 src/scheduler/worker_goals.rs create mode 100644 src/scheduler/worker_monitor.rs diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index ff2aa7e0b8..49ae9ce2c9 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -5,8 +5,8 @@ use std::sync::Arc; /// This data structure lets mutators trigger GC. pub struct GCRequester { - /// An atomic flag outside `RequestSync` so that mutators can check if GC has already been - /// requested in `poll` without acquiring the mutex. + /// Set by mutators to trigger GC. It is atomic so that mutators can check if GC has already + /// been requested efficiently in `poll` without acquiring any mutex. request_flag: AtomicBool, scheduler: Arc>, } @@ -27,6 +27,9 @@ impl GCRequester { } if !self.request_flag.swap(true, Ordering::Relaxed) { + // `GCWorkScheduler::request_schedule_collection` needs to hold a mutex to communicate + // with GC workers, so only the first mutator that changed the `request_flag` to `true` + // shall do it. self.scheduler.request_schedule_collection(); } } diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index 45501e9943..b4d79adeb8 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -17,6 +17,8 @@ mod work_bucket; pub use work_bucket::WorkBucketStage; mod worker; +mod worker_monitor; +mod worker_goals; pub(crate) use worker::current_worker_ordinal; pub use worker::GCWorker; diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 408ef35bc6..4b5d933448 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -1,11 +1,12 @@ use super::gc_work::ScheduleCollection; use super::stat::SchedulerStat; use super::work_bucket::*; -use super::worker::{GCWorker, ThreadId, WorkerGoals, WorkerGroup, WorkerMonitor}; +use super::worker::{GCWorker, ThreadId, WorkerGroup}; +use super::worker_goals::{WorkerGoal, WorkerGoals}; +use super::worker_monitor::{LastParkedResult, WorkerMonitor}; use super::*; use crate::global_state::GcStatus; use crate::mmtk::MMTK; -use crate::scheduler::worker::{LastParkedResult, WorkerGoal}; use crate::util::opaque_pointer::*; use crate::util::options::AffinityKind; use crate::util::rust_util::array_from_fn; @@ -95,7 +96,14 @@ impl GCWorkScheduler { /// Request a GC to be scheduled. Called by mutator via `GCRequester`. pub(crate) fn request_schedule_collection(&self) { debug!("A mutator is sending GC-scheduling request to workers..."); - self.worker_monitor.request_schedule_collection(); + self.worker_monitor.make_request(|requests| { + if !requests.gc { + requests.gc = true; + true + } else { + false + } + }); } /// Add the `ScheduleCollection` packet. Called by the last parked worker. @@ -380,7 +388,7 @@ impl GCWorkScheduler { }; match current_goal { - worker::WorkerGoal::Gc { start_time } => { + WorkerGoal::Gc { start_time } => { // We are in the progress of GC. // In stop-the-world GC, mutators cannot request for GC while GC is in progress. @@ -410,7 +418,7 @@ impl GCWorkScheduler { self.respond_to_requests(goals) } } - worker::WorkerGoal::StopForFork => { + WorkerGoal::StopForFork => { // A worker parked again when it is asked to exit. unimplemented!() } diff --git a/src/scheduler/work_bucket.rs b/src/scheduler/work_bucket.rs index d7076354eb..4386832b76 100644 --- a/src/scheduler/work_bucket.rs +++ b/src/scheduler/work_bucket.rs @@ -1,4 +1,4 @@ -use super::worker::WorkerMonitor; +use super::worker_monitor::WorkerMonitor; use super::*; use crate::vm::VMBinding; use crossbeam::deque::{Injector, Steal, Worker}; diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 310edf2aa8..19255c19be 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -12,7 +12,7 @@ use crossbeam::queue::ArrayQueue; #[cfg(feature = "count_live_bytes_in_gc")] use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; -use std::sync::{Arc, Condvar, Mutex, MutexGuard}; +use std::sync::{Arc, Mutex}; /// Represents the ID of a GC worker thread. pub type ThreadId = usize; @@ -33,8 +33,9 @@ pub fn current_worker_ordinal() -> ThreadId { ordinal } -/// The part shared between a GCWorker and the scheduler. -/// This structure is used for communication, e.g. adding new work packets. +/// The struct has one instance per worker, but is shared between workers via the scheduler +/// instance. This structure is used for communication between workers, e.g. adding designated +/// work packets, stealing work packets from other workers, and collecting per-worker statistics. pub struct GCWorkerShared { /// Worker-local statistics data. stat: AtomicRefCell>, @@ -71,246 +72,6 @@ impl GCWorkerShared { } } -/// The result type of the `on_last_parked` call-back in `WorkMonitor::park_and_wait`. -/// It decides how many workers should wake up after `on_last_parked`. -pub(crate) enum LastParkedResult { - /// The last parked worker should wait, too, until more work packets are added. - ParkSelf, - /// The last parked worker should unpark and find work packet to do. - WakeSelf, - /// Wake up all parked GC workers. - WakeAll, -} - -/// A data structure for synchronizing workers. -/// -/// - It allows workers to park and unpark. It keeps track of the number of workers parked, and -/// allows the last parked worker to perform operations that can only be performed when all -/// workers have parked. -/// - It allows mutators to notify workers to schedule a GC. -pub(crate) struct WorkerMonitor { - /// The synchronized part. - sync: Mutex, - /// Notified if workers have anything to do. That include any work packets available, and any - /// field in `sync.goals.requests` set to true. - have_anything_to_do: Condvar, -} - -/// The synchronized part of `WorkerMonitor`. -pub(crate) struct WorkerMonitorSync { - /// The total number of workers. - worker_count: usize, - /// Number of parked workers. - parked_workers: usize, - /// Current and requested goals. - goals: WorkerGoals, -} - -#[derive(Default)] -pub(crate) struct WorkerGoals { - /// What are the workers doing now? - pub(crate) current: Option, - /// Requests received from mutators. - pub(crate) requests: WorkerRequests, -} - -/// The thing workers are currently doing. This affects several things, such as what the last -/// parked worker will do, and whether workers will stop themselves. -pub(crate) enum WorkerGoal { - Gc { - start_time: std::time::Instant, - }, - #[allow(unused)] // TODO: Implement forking support later. - StopForFork, -} - -/// Reqeusts received from mutators. Workers respond to those requests when they do not have a -/// current goal. Multiple things can be requested at the same time, and workers respond to the -/// thing with the highest priority. -/// -/// The fields of this structs are ordered with decreasing priority. -#[derive(Default)] // All fields should be false by default. -pub(crate) struct WorkerRequests { - /// The VM needs to fork. Workers should save their contexts and exit. - pub(crate) stop_for_fork: bool, - /// GC is requested. Workers should schedule a GC. - pub(crate) gc: bool, -} - -impl WorkerMonitor { - pub fn new(worker_count: usize) -> Self { - Self { - sync: Mutex::new(WorkerMonitorSync { - worker_count, - parked_workers: 0, - goals: Default::default(), - }), - have_anything_to_do: Default::default(), - } - } - - /// Request a GC worker to schedule the next GC. - /// Callable from mutator threads. - pub fn request_schedule_collection(&self) { - let mut guard = self.sync.lock().unwrap(); - if !guard.goals.requests.gc { - guard.goals.requests.gc = true; - self.notify_work_available_inner(false, &mut guard); - } - } - - /// Wake up workers when more work packets are made available for workers, - /// or a mutator has requested the GC workers to schedule a GC. - pub fn notify_work_available(&self, all: bool) { - let mut guard = self.sync.lock().unwrap(); - self.notify_work_available_inner(all, &mut guard); - } - - /// Like `notify_work_available` but the current thread must have already acquired the - /// mutex of `WorkerMonitorSync`. - fn notify_work_available_inner(&self, all: bool, _guard: &mut MutexGuard) { - if all { - self.have_anything_to_do.notify_all(); - } else { - self.have_anything_to_do.notify_one(); - } - } - - /// Park a worker and wait on the CondVar `work_available`. - /// - /// If it is the last worker parked, `on_last_parked` will be called. - /// The argument of `on_last_parked` is true if `sync.gc_requested` is `true`. - /// The return value of `on_last_parked` will determine whether this worker and other workers - /// will wake up or block waiting. - pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) - where - VM: VMBinding, - F: FnOnce(&mut WorkerGoals) -> LastParkedResult, - { - let mut sync = self.sync.lock().unwrap(); - - // Park this worker - let all_parked = sync.inc_parked_workers(); - trace!( - "Worker {} parked. parked/total: {}/{}. All parked: {}", - worker.ordinal, - sync.parked_workers, - sync.worker_count, - all_parked - ); - - let mut should_wait = false; - - if all_parked { - debug!("Worker {} is the last worker parked.", worker.ordinal); - let result = on_last_parked(&mut sync.goals); - match result { - LastParkedResult::ParkSelf => { - should_wait = true; - } - LastParkedResult::WakeSelf => { - // Continue without waiting. - } - LastParkedResult::WakeAll => { - self.notify_work_available_inner(true, &mut sync); - } - } - } else { - should_wait = true; - } - - if should_wait { - // Notes on CondVar usage: - // - // Conditional variables are usually tested in a loop while holding a mutex - // - // lock(); - // while condition() { - // condvar.wait(); - // } - // unlock(); - // - // The actual condition for this `self.work_available.wait(sync)` is: - // - // 1. any work packet is available, or - // 2. a request for scheduling GC is submitted. - // - // But it is not used like the typical use pattern shown above, mainly because work - // packets can be added without holding the mutex `self.sync`. This means one worker - // can add a new work packet (no mutex needed) right after another worker finds no work - // packets are available and then park. In other words, condition (1) can suddenly - // become true after a worker sees it is false but before the worker blocks waiting on - // the CondVar. If this happens, the last parked worker will block forever and never - // get notified. This may happen if mutators or the previously existing "coordinator - // thread" can add work packets. - // - // However, after the "coordinator thread" was removed, only GC worker threads can add - // work packets during GC. Parked workers (except the last parked worker) cannot make - // more work packets availble (by adding new packets or opening buckets). For this - // reason, the **last** parked worker can be sure that after it finds no packets - // available, no other workers can add another work packet (because they all parked). - // So the **last** parked worker can open more buckets or declare GC finished. - // - // Condition (2), i.e. `sync.should_schedule_gc` is guarded by the mutator `sync`. - // When set (by a mutator via `request_schedule_collection`), it will notify a - // worker; and the last parked worker always checks it before waiting. So this - // condition will not be set without any worker noticing. - // - // Note that generational barriers may add `ProcessModBuf` work packets when not in GC. - // This is benign because those work packets are not executed immediately, and are - // guaranteed to be executed in the next GC. - - // Notes on spurious wake-up: - // - // 1. The condition variable `work_available` is guarded by `self.sync`. Because the - // last parked worker is holding the mutex `self.sync` when executing - // `on_last_parked`, no workers can unpark (even if they spuriously wake up) during - // `on_last_parked` because they cannot re-acquire the mutex `self.sync`. - // - // 2. Workers may spuriously wake up and unpark when `on_last_parked` is not being - // executed (including the case when the last parked worker is waiting here, too). - // If one or more GC workers spuriously wake up, they will check for work packets, - // and park again if not available. The last parked worker will ensure the two - // conditions listed above are both false before blocking. If either condition is - // true, the last parked worker will take action. - sync = self.have_anything_to_do.wait(sync).unwrap(); - } - - // Unpark this worker. - sync.dec_parked_workers(); - trace!( - "Worker {} unparked. parked/total: {}/{}.", - worker.ordinal, - sync.parked_workers, - sync.worker_count, - ); - } -} - -impl WorkerMonitorSync { - /// Increase the packed-workers counter. - /// Called before a worker is parked. - /// - /// Return true if all the workers are parked. - fn inc_parked_workers(&mut self) -> bool { - let old = self.parked_workers; - debug_assert!(old < self.worker_count); - let new = old + 1; - self.parked_workers = new; - new == self.worker_count - } - - /// Decrease the packed-workers counter. - /// Called after a worker is resumed from the parked state. - fn dec_parked_workers(&mut self) { - let old = self.parked_workers; - debug_assert!(old <= self.worker_count); - debug_assert!(old > 0); - let new = old - 1; - self.parked_workers = new; - } -} - /// A GC worker. This part is privately owned by a worker thread. pub struct GCWorker { /// The VM-specific thread-local state of the GC thread. diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs new file mode 100644 index 0000000000..ae9a36763a --- /dev/null +++ b/src/scheduler/worker_goals.rs @@ -0,0 +1,47 @@ +//! This module contain "goals" which are larger than work packets, and describes what workers are +//! working towards on a high level. +//! +//! A "goal" is represented by a `WorkerGoal`. All workers work towards a single goal at a time. +//! THe current goal influences the behavior of GC workers, especially the last parked worker. +//! For example, +//! +//! - When in the progress of GC, the last parker will try to open buckets or announce the GC +//! has finished. +//! - When stopping for fork, every worker should exit when waken. +//! +//! The struct `WorkerRequests` keeps a list of requests from mutators, such as requests for GC +//! and requests for forking. But the GC workers will only respond to one request at a time. + +use std::time::Instant; + +/// This current and reqeusted goals. +#[derive(Default)] +pub(crate) struct WorkerGoals { + /// What are the workers doing now? + pub(crate) current: Option, + /// Requests received from mutators. + pub(crate) requests: WorkerRequests, +} + +/// The thing workers are currently doing. This affects several things, such as what the last +/// parked worker will do, and whether workers will stop themselves. +pub(crate) enum WorkerGoal { + Gc { + start_time: Instant, + }, + #[allow(unused)] // TODO: Implement forking support later. + StopForFork, +} + +/// Reqeusts received from mutators. Workers respond to those requests when they do not have a +/// current goal. Multiple things can be requested at the same time, and workers respond to the +/// thing with the highest priority. +/// +/// The fields of this structs are ordered with decreasing priority. +#[derive(Default)] // All fields should be false by default. +pub(crate) struct WorkerRequests { + /// The VM needs to fork. Workers should save their contexts and exit. + pub(crate) stop_for_fork: bool, + /// GC is requested. Workers should schedule a GC. + pub(crate) gc: bool, +} diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs new file mode 100644 index 0000000000..aba804f274 --- /dev/null +++ b/src/scheduler/worker_monitor.rs @@ -0,0 +1,238 @@ +//! This module contains `WorkerMonitor` and related types. It purposes includes: +//! +//! - allowing workers to park, +//! - letting the last parked worker take action, and +//! - letting workers and mutators to notify workers when workers are given things to do. + +use std::sync::{Condvar, Mutex}; + +use crate::vm::VMBinding; + +use super::{ + worker_goals::{WorkerGoals, WorkerRequests}, + GCWorker, +}; + +/// The result type of the `on_last_parked` call-back in `WorkMonitor::park_and_wait`. +/// It decides how many workers should wake up after `on_last_parked`. +pub(crate) enum LastParkedResult { + /// The last parked worker should wait, too, until more work packets are added. + ParkSelf, + /// The last parked worker should unpark and find work packet to do. + WakeSelf, + /// Wake up all parked GC workers. + WakeAll, +} + +/// A data structure for synchronizing workers with each other and with mutators. +/// +/// Unlike `GCWorkerShared`, there is only one instance of `WorkerMonitor`. +/// +/// - It allows workers to park and unpark. +/// - It allows mutators to notify workers to schedule a GC. +pub(crate) struct WorkerMonitor { + /// The synchronized part. + sync: Mutex, + /// Workers wait on this when idle. Notified if workers have things to do. That include: + /// - any work packets available, and + /// - any field in `sync.goals.requests` set to true. + workers_have_anything_to_do: Condvar, +} + +/// The synchronized part of `WorkerMonitor`. +struct WorkerMonitorSync { + /// Count parked workers. + parker: WorkerParker, + /// Current and requested goals. + goals: WorkerGoals, +} + +/// This struct counts the number of workers parked and identifies the last parked worker. +struct WorkerParker { + /// The total number of workers. + worker_count: usize, + /// Number of parked workers. + parked_workers: usize, +} + +impl WorkerParker { + fn new(worker_count: usize) -> Self { + Self { + worker_count, + parked_workers: 0, + } + } + + /// Increase the packed-workers counter. + /// Called before a worker is parked. + /// + /// Return true if all the workers are parked. + fn inc_parked_workers(&mut self) -> bool { + let old = self.parked_workers; + debug_assert!(old < self.worker_count); + let new = old + 1; + self.parked_workers = new; + new == self.worker_count + } + + /// Decrease the packed-workers counter. + /// Called after a worker is resumed from the parked state. + fn dec_parked_workers(&mut self) { + let old = self.parked_workers; + debug_assert!(old <= self.worker_count); + debug_assert!(old > 0); + let new = old - 1; + self.parked_workers = new; + } +} + +impl WorkerMonitor { + pub fn new(worker_count: usize) -> Self { + Self { + sync: Mutex::new(WorkerMonitorSync { + parker: WorkerParker::new(worker_count), + goals: Default::default(), + }), + workers_have_anything_to_do: Default::default(), + } + } + + /// Make a request. The `callback` will be called while holding the mutex `self.sync` so that + /// the caller can set some of its fields to true. + /// + /// The `callback` has one parameter: + /// + /// - a mutable reference to the `WorkerRequests` instance. + /// + /// The `callback` should return `true` if it set any field of `WorkerRequests` to true. + pub fn make_request(&self, callback: Callback) + where + Callback: FnOnce(&mut WorkerRequests) -> bool, + { + let mut guard = self.sync.lock().unwrap(); + let set_any_fields = callback(&mut guard.goals.requests); + if set_any_fields { + self.notify_work_available(false); + } + } + + /// Wake up workers when more work packets are made available for workers, + /// or a mutator has requested the GC workers to schedule a GC. + pub fn notify_work_available(&self, all: bool) { + if all { + self.workers_have_anything_to_do.notify_all(); + } else { + self.workers_have_anything_to_do.notify_one(); + } + } + + /// Park a worker and wait on the CondVar `work_available`. + /// + /// If it is the last worker parked, `on_last_parked` will be called. + /// The argument of `on_last_parked` is true if `sync.gc_requested` is `true`. + /// The return value of `on_last_parked` will determine whether this worker and other workers + /// will wake up or block waiting. + pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) + where + VM: VMBinding, + F: FnOnce(&mut WorkerGoals) -> LastParkedResult, + { + let mut sync = self.sync.lock().unwrap(); + + // Park this worker + let all_parked = sync.parker.inc_parked_workers(); + trace!( + "Worker {} parked. parked/total: {}/{}. All parked: {}", + worker.ordinal, + sync.parker.parked_workers, + sync.parker.worker_count, + all_parked + ); + + let mut should_wait = false; + + if all_parked { + debug!("Worker {} is the last worker parked.", worker.ordinal); + let result = on_last_parked(&mut sync.goals); + match result { + LastParkedResult::ParkSelf => { + should_wait = true; + } + LastParkedResult::WakeSelf => { + // Continue without waiting. + } + LastParkedResult::WakeAll => { + self.notify_work_available(true); + } + } + } else { + should_wait = true; + } + + if should_wait { + // Notes on CondVar usage: + // + // Conditional variables are usually tested in a loop while holding a mutex + // + // lock(); + // while condition() { + // condvar.wait(); + // } + // unlock(); + // + // The actual condition for this `self.work_available.wait(sync)` is: + // + // 1. any work packet is available, or + // 2. a request for scheduling GC is submitted. + // + // But it is not used like the typical use pattern shown above, mainly because work + // packets can be added without holding the mutex `self.sync`. This means one worker + // can add a new work packet (no mutex needed) right after another worker finds no work + // packets are available and then park. In other words, condition (1) can suddenly + // become true after a worker sees it is false but before the worker blocks waiting on + // the CondVar. If this happens, the last parked worker will block forever and never + // get notified. This may happen if mutators or the previously existing "coordinator + // thread" can add work packets. + // + // However, after the "coordinator thread" was removed, only GC worker threads can add + // work packets during GC. Parked workers (except the last parked worker) cannot make + // more work packets availble (by adding new packets or opening buckets). For this + // reason, the **last** parked worker can be sure that after it finds no packets + // available, no other workers can add another work packet (because they all parked). + // So the **last** parked worker can open more buckets or declare GC finished. + // + // Condition (2), i.e. `sync.should_schedule_gc` is guarded by the mutator `sync`. + // When set (by a mutator via `request_schedule_collection`), it will notify a + // worker; and the last parked worker always checks it before waiting. So this + // condition will not be set without any worker noticing. + // + // Note that generational barriers may add `ProcessModBuf` work packets when not in GC. + // This is benign because those work packets are not executed immediately, and are + // guaranteed to be executed in the next GC. + + // Notes on spurious wake-up: + // + // 1. The condition variable `work_available` is guarded by `self.sync`. Because the + // last parked worker is holding the mutex `self.sync` when executing + // `on_last_parked`, no workers can unpark (even if they spuriously wake up) during + // `on_last_parked` because they cannot re-acquire the mutex `self.sync`. + // + // 2. Workers may spuriously wake up and unpark when `on_last_parked` is not being + // executed (including the case when the last parked worker is waiting here, too). + // If one or more GC workers spuriously wake up, they will check for work packets, + // and park again if not available. The last parked worker will ensure the two + // conditions listed above are both false before blocking. If either condition is + // true, the last parked worker will take action. + sync = self.workers_have_anything_to_do.wait(sync).unwrap(); + } + + // Unpark this worker. + sync.parker.dec_parked_workers(); + trace!( + "Worker {} unparked. parked/total: {}/{}.", + worker.ordinal, + sync.parker.parked_workers, + sync.parker.worker_count, + ); + } +} From 1f9d9053a9ef75ac5ac69426f63f936c11836172 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 17 Jan 2024 16:35:38 +0800 Subject: [PATCH 15/48] Create worker structs lazily --- src/memory_manager.rs | 12 ++++++++ src/scheduler/worker.rs | 65 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 6 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index e5a918233f..8f0b5d9403 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -495,6 +495,18 @@ pub fn initialize_collection(mmtk: &'static MMTK, tls: VMThre probe!(mmtk, collection_initialized); } +pub fn uninitialize_collection(mmtk: &'static MMTK, tls: VMThread) { + assert!( + mmtk.state.is_initialized(), + "MMTk collection has not been initialized, yet (was initialize_collection() called before?)" + ); + //mmtk.scheduler.stop_gc_threads_for_forking(mmtk, tls); + mmtk.state.initialized.store(false, Ordering::SeqCst); + //probe!(mmtk, collection_initialized); + unimplemented!() +} + + /// Allow MMTk to trigger garbage collection when heap is full. This should only be used in pair with disable_collection(). /// See the comments on disable_collection(). If disable_collection() is not used, there is no need to call this function at all. /// Note this call is not thread safe, only one VM thread should call this. diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 19255c19be..e3e707a7da 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -214,13 +214,37 @@ impl GCWorker { } } +enum WorkerCreationState { + NotCreated { + /// The local work queues for to-be-created workers. + unspawned_local_work_queues: Vec>>>, + }, + Created { + /// `Worker` instances for worker threads that have not been created yet, or worker thread that + /// are suspended (e.g. for forking). + suspended_workers: Vec>>, + }, +} + +impl WorkerCreationState { + pub fn is_created(&self) -> bool { + matches!(self, WorkerCreationState::Created { .. }) + } +} + /// A worker group to manage all the GC workers. pub(crate) struct WorkerGroup { /// Shared worker data pub workers_shared: Vec>>, - unspawned_local_work_queues: Mutex>>>>, + /// The stateful part + state: Mutex>, } +/// We have to persuade Rust that `WorkerGroup` is safe to share because it thinks one worker can +/// refer to another worker via the path "worker -> scheduler -> worker_group -> suspended_workers +/// -> worker" which it thinks is cyclic reference and unsafe. +unsafe impl Sync for WorkerGroup {} + impl WorkerGroup { /// Create a WorkerGroup pub fn new(num_workers: usize) -> Arc { @@ -238,13 +262,21 @@ impl WorkerGroup { Arc::new(Self { workers_shared, - unspawned_local_work_queues: Mutex::new(unspawned_local_work_queues), + state: Mutex::new(WorkerCreationState::NotCreated { + unspawned_local_work_queues, + }), }) } - /// Spawn all the worker threads - pub fn spawn(&self, mmtk: &'static MMTK, tls: VMThread) { - let mut unspawned_local_work_queues = self.unspawned_local_work_queues.lock().unwrap(); + /// Create `GCWorker` instances, but do not create threads, yet. + fn create_worker_structs(&self, state: &mut WorkerCreationState, mmtk: &'static MMTK) { + let WorkerCreationState::NotCreated { unspawned_local_work_queues } = state else { + panic!("GCWorker structs have already been created"); + }; + + assert_eq!(self.workers_shared.len(), unspawned_local_work_queues.len()); + let mut suspended_workers = Vec::with_capacity(self.workers_shared.len()); + // Spawn each worker thread. for (ordinal, shared) in self.workers_shared.iter().enumerate() { let worker = Box::new(GCWorker::new( @@ -254,9 +286,30 @@ impl WorkerGroup { shared.clone(), unspawned_local_work_queues.pop().unwrap(), )); + suspended_workers.push(worker); + } + + *state = WorkerCreationState::Created { suspended_workers }; + } + + /// Spawn all the worker threads + pub fn spawn(&self, mmtk: &'static MMTK, tls: VMThread) { + let mut guard = self.state.lock().unwrap(); + + // Create worker structs lazily. If we are spawning workers after fork, worker structs + // will have been created already, so we can skip the creation. + if !guard.is_created() { + self.create_worker_structs(&mut *guard, mmtk); + } + + let WorkerCreationState::Created { ref mut suspended_workers } = *guard else { + panic!("GCWorker structs have not been created, yet."); + }; + + // Drain the queue. We transfer the ownership of each `GCWorker` instance to a GC thread. + for worker in suspended_workers.drain(..) { VM::VMCollection::spawn_gc_thread(tls, GCThreadContext::::Worker(worker)); } - debug_assert!(unspawned_local_work_queues.is_empty()); } /// Get the number of workers in the group From 029bfdae06be6d27bfdc3aa4f7a29dced6e65db3 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 18 Jan 2024 13:22:27 +0800 Subject: [PATCH 16/48] Allow the loop in GCWorker::run to break --- src/memory_manager.rs | 5 ++++- src/scheduler/scheduler.rs | 20 ++++++++++++++------ src/scheduler/worker.rs | 32 +++++++++++++++++++++++++------- src/scheduler/worker_monitor.rs | 7 +++++-- 4 files changed, 48 insertions(+), 16 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 8f0b5d9403..961b7e2745 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -461,7 +461,10 @@ pub fn gc_poll(mmtk: &MMTK, tls: VMMutatorThread) { } } -/// Run the main loop of a GC worker. This method does not return. +/// Run the main loop of a GC worker. +/// +/// This method runs until the worker exits. Currently a worker may exit after +/// `uninitialize_collection` is called. /// /// Arguments: /// * `tls`: The thread that will be used as the GC worker. diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 4b5d933448..37873a5159 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -1,3 +1,5 @@ +use self::worker::{PollResult, WorkerShouldExit}; + use super::gc_work::ScheduleCollection; use super::stat::SchedulerStat; use super::work_bucket::*; @@ -362,20 +364,26 @@ impl GCWorkScheduler { /// Called by workers to get a schedulable work packet. /// Park the worker if there're no available packets. - pub fn poll(&self, worker: &GCWorker) -> Box> { - self.poll_schedulable_work(worker) - .unwrap_or_else(|| self.poll_slow(worker)) + pub(crate) fn poll(&self, worker: &GCWorker) -> PollResult { + if let Some(work) = self.poll_schedulable_work(worker) { + return Ok(work); + } + self.poll_slow(worker) } - fn poll_slow(&self, worker: &GCWorker) -> Box> { + fn poll_slow(&self, worker: &GCWorker) -> PollResult { loop { // Retry polling if let Some(work) = self.poll_schedulable_work(worker) { - return work; + return Ok(work); } - self.worker_monitor + let should_exit = self.worker_monitor .park_and_wait(worker, |goals| self.on_last_parked(worker, goals)); + + if should_exit { + return Err(WorkerShouldExit); + } } } diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index e3e707a7da..52217908bf 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -107,6 +107,16 @@ impl GCWorkerShared { } } +/// A special error type that indicate a worker should exit. +/// This may happen if the VM needs to fork and asks workers to exit. +pub(crate) struct WorkerShouldExit; + +/// The result type of `GCWorker::pool`. +/// Too many functions return `Option>>`. In most cases, when `None` is +/// returned, the caller should try getting work packets from another place. To avoid confusion, +/// we use `Err(WorkerShouldExit)` to clearly indicate that the worker should exit immediately. +pub(crate) type PollResult = Result>, WorkerShouldExit>; + impl GCWorker { pub(crate) fn new( mmtk: &'static MMTK, @@ -171,12 +181,16 @@ impl GCWorker { /// 2. Poll from the local work queue. /// 3. Poll from activated global work-buckets /// 4. Steal from other workers - fn poll(&self) -> Box> { - self.shared - .designated_work - .pop() - .or_else(|| self.local_work_buffer.pop()) - .unwrap_or_else(|| self.scheduler().poll(self)) + fn poll(&mut self) -> PollResult { + if let Some(work) = self.shared.designated_work.pop() { + return Ok(work); + } + + if let Some(work) = self.local_work_buffer.pop() { + return Ok(work); + } + + self.scheduler().poll(self) } /// Entry of the worker thread. Resolve thread affinity, if it has been specified by the user. @@ -197,7 +211,10 @@ impl GCWorker { // If we have work_start and work_end, we cannot measure the first // poll. probe!(mmtk, work_poll); - let mut work = self.poll(); + let Ok(mut work) = self.poll() else { + // The worker is asked to exit. Break from the loop. + break; + }; // probe! expands to an empty block on unsupported platforms #[allow(unused_variables)] let typename = work.get_type_name(); @@ -211,6 +228,7 @@ impl GCWorker { probe!(mmtk, work, typename.as_ptr(), typename.len()); work.do_work_with_stat(self, mmtk); } + probe!(mmtk, gcworker_exit); } } diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs index aba804f274..8b39dd727a 100644 --- a/src/scheduler/worker_monitor.rs +++ b/src/scheduler/worker_monitor.rs @@ -9,7 +9,7 @@ use std::sync::{Condvar, Mutex}; use crate::vm::VMBinding; use super::{ - worker_goals::{WorkerGoals, WorkerRequests}, + worker_goals::{WorkerGoal, WorkerGoals, WorkerRequests}, GCWorker, }; @@ -132,7 +132,7 @@ impl WorkerMonitor { /// The argument of `on_last_parked` is true if `sync.gc_requested` is `true`. /// The return value of `on_last_parked` will determine whether this worker and other workers /// will wake up or block waiting. - pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) + pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) -> bool where VM: VMBinding, F: FnOnce(&mut WorkerGoals) -> LastParkedResult, @@ -234,5 +234,8 @@ impl WorkerMonitor { sync.parker.parked_workers, sync.parker.worker_count, ); + + // If the current goal is `StopForFork`, return true so that the worker thread will exit. + matches!(sync.goals.current, Some(WorkerGoal::StopForFork)) } } From d32885901b96bbb4f18a0cf4e19970b443f4930f Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 18 Jan 2024 16:17:45 +0800 Subject: [PATCH 17/48] Stop and restart GC workers for forking --- src/memory_manager.rs | 20 +++++++++++------ src/scheduler/scheduler.rs | 28 ++++++++++++++++++++++-- src/scheduler/worker.rs | 38 ++++++++++++++++++++++++++++++++- src/scheduler/worker_goals.rs | 5 +++-- src/scheduler/worker_monitor.rs | 7 ++++++ 5 files changed, 86 insertions(+), 12 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 961b7e2745..964d55270a 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -474,15 +474,18 @@ pub fn gc_poll(mmtk: &MMTK, tls: VMMutatorThread) { pub fn start_worker( mmtk: &'static MMTK, tls: VMWorkerThread, - worker: &mut GCWorker, + mut worker: Box>, ) { worker.run(tls, mmtk); + mmtk.scheduler.worker_group.surrender_gc_worker(worker); } /// Initialize the scheduler and GC workers that are required for doing garbage collections. /// This is a mandatory call for a VM during its boot process once its thread system -/// is ready. This should only be called once. This call will invoke Collection::spawn_gc_thread() -/// to create GC threads. +/// is ready. This call will invoke Collection::spawn_gc_thread() to create GC threads. +/// +/// This function can also be called after `uninitialize_collection` is called in order to re-spawn +/// the GC thtreads. /// /// Arguments: /// * `mmtk`: A reference to an MMTk instance. @@ -498,15 +501,18 @@ pub fn initialize_collection(mmtk: &'static MMTK, tls: VMThre probe!(mmtk, collection_initialized); } -pub fn uninitialize_collection(mmtk: &'static MMTK, tls: VMThread) { +/// Ask GC worker threads to exit. Currently, this function is for the sole purpose of forking. +/// +/// Arguments: +/// * `mmtk`: A reference to an MMTk instance. +pub fn uninitialize_collection(mmtk: &'static MMTK) { assert!( mmtk.state.is_initialized(), "MMTk collection has not been initialized, yet (was initialize_collection() called before?)" ); - //mmtk.scheduler.stop_gc_threads_for_forking(mmtk, tls); + mmtk.scheduler.stop_gc_threads_for_forking(); mmtk.state.initialized.store(false, Ordering::SeqCst); - //probe!(mmtk, collection_initialized); - unimplemented!() + probe!(mmtk, collection_uninitialized); } diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 37873a5159..ab08d6c390 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -90,6 +90,22 @@ impl GCWorkScheduler { self.worker_group.spawn(mmtk, tls) } + /// Ask all GC workers to exit for forking, and wait until all workers exited. + pub fn stop_gc_threads_for_forking(self: &Arc) { + self.worker_monitor.make_request(|requests| { + if !requests.stop_for_fork { + requests.stop_for_fork = true; + true + } else { + false + } + }); + + self.worker_group.wait_until_worker_exited(); + + self.worker_monitor.on_all_workers_exited(); + } + /// Resolve the affinity of a thread. pub fn resolve_affinity(&self, thread: ThreadId) { self.affinity.resolve_affinity(thread); @@ -101,6 +117,8 @@ impl GCWorkScheduler { self.worker_monitor.make_request(|requests| { if !requests.gc { requests.gc = true; + warn!("GC requested."); + dbg!(requests); true } else { false @@ -449,13 +467,19 @@ impl GCWorkScheduler { start_time: Instant::now(), }); + warn!("Responded to GC request."); + dbg!(goals); + self.add_schedule_collection_packet(); return LastParkedResult::WakeSelf; } if goals.requests.stop_for_fork { - // The VM wants to fork. GC threads should exit. - unimplemented!() + // A mutator wanted to fork. + goals.requests.stop_for_fork = false; + + goals.current = Some(WorkerGoal::StopForFork); + return LastParkedResult::WakeAll; } // No reqeusts. Park this worker, too. diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 52217908bf..2e65407b56 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -12,7 +12,7 @@ use crossbeam::queue::ArrayQueue; #[cfg(feature = "count_live_bytes_in_gc")] use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Condvar, Mutex}; /// Represents the ID of a GC worker thread. pub type ThreadId = usize; @@ -197,6 +197,7 @@ impl GCWorker { /// Each worker will keep polling and executing work packets in a loop. pub fn run(&mut self, tls: VMWorkerThread, mmtk: &'static MMTK) { probe!(mmtk, gcworker_run); + warn!("Worker {} started.", self.ordinal); WORKER_ORDINAL.with(|x| x.store(self.ordinal, Ordering::SeqCst)); self.scheduler.resolve_affinity(self.ordinal); self.tls = tls; @@ -256,6 +257,9 @@ pub(crate) struct WorkerGroup { pub workers_shared: Vec>>, /// The stateful part state: Mutex>, + /// The condition of "all workers exited", i.e. the number of suspended workers is equal to the + /// number of workers. + cond_all_workers_exited: Condvar, } /// We have to persuade Rust that `WorkerGroup` is safe to share because it thinks one worker can @@ -283,6 +287,7 @@ impl WorkerGroup { state: Mutex::new(WorkerCreationState::NotCreated { unspawned_local_work_queues, }), + cond_all_workers_exited: Default::default(), }) } @@ -330,6 +335,37 @@ impl WorkerGroup { } } + /// Surrender the `GCWorker` struct when a GC worker exits. + pub fn surrender_gc_worker(&self, worker: Box>) { + let mut state = self.state.lock().unwrap(); + let WorkerCreationState::Created { ref mut suspended_workers } = *state else { + panic!("GCWorker structs have not been created, yet."); + }; + let ordinal = worker.ordinal; + suspended_workers.push(worker); + info!( + "Worker {} surrendered. ({}/{})", + ordinal, + suspended_workers.len(), + self.worker_count() + ); + if suspended_workers.len() == self.worker_count() { + info!("All {} workers surrendered.", self.worker_count()); + self.cond_all_workers_exited.notify_all(); + } + } + + /// Wait until all workers exited. + pub fn wait_until_worker_exited(&self) { + let guard = self.state.lock().unwrap(); + let _guard = self.cond_all_workers_exited.wait_while(guard, |state| { + let WorkerCreationState::Created { ref suspended_workers } = *state else { + panic!("GCWorker structs have not been created, yet."); + }; + suspended_workers.len() != self.worker_count() + }); + } + /// Get the number of workers in the group pub fn worker_count(&self) -> usize { self.workers_shared.len() diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs index ae9a36763a..0dc991f9f0 100644 --- a/src/scheduler/worker_goals.rs +++ b/src/scheduler/worker_goals.rs @@ -15,7 +15,7 @@ use std::time::Instant; /// This current and reqeusted goals. -#[derive(Default)] +#[derive(Default, Debug)] pub(crate) struct WorkerGoals { /// What are the workers doing now? pub(crate) current: Option, @@ -25,6 +25,7 @@ pub(crate) struct WorkerGoals { /// The thing workers are currently doing. This affects several things, such as what the last /// parked worker will do, and whether workers will stop themselves. +#[derive(Debug)] pub(crate) enum WorkerGoal { Gc { start_time: Instant, @@ -38,7 +39,7 @@ pub(crate) enum WorkerGoal { /// thing with the highest priority. /// /// The fields of this structs are ordered with decreasing priority. -#[derive(Default)] // All fields should be false by default. +#[derive(Default, Debug)] // All fields should be false by default. pub(crate) struct WorkerRequests { /// The VM needs to fork. Workers should save their contexts and exit. pub(crate) stop_for_fork: bool, diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs index 8b39dd727a..893d17353f 100644 --- a/src/scheduler/worker_monitor.rs +++ b/src/scheduler/worker_monitor.rs @@ -153,6 +153,7 @@ impl WorkerMonitor { if all_parked { debug!("Worker {} is the last worker parked.", worker.ordinal); + dbg!(&sync.goals); let result = on_last_parked(&mut sync.goals); match result { LastParkedResult::ParkSelf => { @@ -238,4 +239,10 @@ impl WorkerMonitor { // If the current goal is `StopForFork`, return true so that the worker thread will exit. matches!(sync.goals.current, Some(WorkerGoal::StopForFork)) } + + /// Called when all workers have exited. + pub fn on_all_workers_exited(&self) { + let mut sync = self.sync.try_lock().unwrap(); + sync.goals.current = None; + } } From 0d79d8d362d48daf7ea74c9f562d7b5cc47ca7e3 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 19 Jan 2024 17:08:27 +0800 Subject: [PATCH 18/48] Adjust logging --- src/scheduler/scheduler.rs | 10 +++------- src/scheduler/worker.rs | 11 ++++++++--- src/scheduler/worker_monitor.rs | 3 +-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index ab08d6c390..7a30007273 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -92,6 +92,7 @@ impl GCWorkScheduler { /// Ask all GC workers to exit for forking, and wait until all workers exited. pub fn stop_gc_threads_for_forking(self: &Arc) { + debug!("A mutator is requesting GC threads to stop for forking..."); self.worker_monitor.make_request(|requests| { if !requests.stop_for_fork { requests.stop_for_fork = true; @@ -117,8 +118,6 @@ impl GCWorkScheduler { self.worker_monitor.make_request(|requests| { if !requests.gc { requests.gc = true; - warn!("GC requested."); - dbg!(requests); true } else { false @@ -456,7 +455,7 @@ impl GCWorkScheduler { assert!(goals.current.is_none()); if goals.requests.gc { - // A mutator requested a GC to be scheduled. + trace!("A mutator requested a GC to be scheduled."); goals.requests.gc = false; // We set the eBPF trace point here so that bpftrace scripts can start recording work @@ -467,15 +466,12 @@ impl GCWorkScheduler { start_time: Instant::now(), }); - warn!("Responded to GC request."); - dbg!(goals); - self.add_schedule_collection_packet(); return LastParkedResult::WakeSelf; } if goals.requests.stop_for_fork { - // A mutator wanted to fork. + trace!("A mutator wanted to fork."); goals.requests.stop_for_fork = false; goals.current = Some(WorkerGoal::StopForFork); diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 2e65407b56..eb7b78b6a9 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -197,7 +197,7 @@ impl GCWorker { /// Each worker will keep polling and executing work packets in a loop. pub fn run(&mut self, tls: VMWorkerThread, mmtk: &'static MMTK) { probe!(mmtk, gcworker_run); - warn!("Worker {} started.", self.ordinal); + trace!("Worker {} started.", self.ordinal); WORKER_ORDINAL.with(|x| x.store(self.ordinal, Ordering::SeqCst)); self.scheduler.resolve_affinity(self.ordinal); self.tls = tls; @@ -229,6 +229,7 @@ impl GCWorker { probe!(mmtk, work, typename.as_ptr(), typename.len()); work.do_work_with_stat(self, mmtk); } + trace!("Worker {} exiting...", self.ordinal); probe!(mmtk, gcworker_exit); } } @@ -333,6 +334,8 @@ impl WorkerGroup { for worker in suspended_workers.drain(..) { VM::VMCollection::spawn_gc_thread(tls, GCThreadContext::::Worker(worker)); } + + debug!("Spawned {} worker threads.", self.worker_count()); } /// Surrender the `GCWorker` struct when a GC worker exits. @@ -343,14 +346,14 @@ impl WorkerGroup { }; let ordinal = worker.ordinal; suspended_workers.push(worker); - info!( + trace!( "Worker {} surrendered. ({}/{})", ordinal, suspended_workers.len(), self.worker_count() ); if suspended_workers.len() == self.worker_count() { - info!("All {} workers surrendered.", self.worker_count()); + debug!("All {} workers surrendered.", self.worker_count()); self.cond_all_workers_exited.notify_all(); } } @@ -364,6 +367,8 @@ impl WorkerGroup { }; suspended_workers.len() != self.worker_count() }); + + debug!("All workers have exited."); } /// Get the number of workers in the group diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs index 893d17353f..cba34c6928 100644 --- a/src/scheduler/worker_monitor.rs +++ b/src/scheduler/worker_monitor.rs @@ -152,8 +152,7 @@ impl WorkerMonitor { let mut should_wait = false; if all_parked { - debug!("Worker {} is the last worker parked.", worker.ordinal); - dbg!(&sync.goals); + trace!("Worker {} is the last worker parked.", worker.ordinal); let result = on_last_parked(&mut sync.goals); match result { LastParkedResult::ParkSelf => { From 44f22ba37d654cecae29f6dd7a6468465d8c5fad Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 19 Jan 2024 17:27:42 +0800 Subject: [PATCH 19/48] Update eBPF tracing document --- tools/tracing/README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/tracing/README.md b/tools/tracing/README.md index 9082f47942..96ff3e6a6d 100644 --- a/tools/tracing/README.md +++ b/tools/tracing/README.md @@ -16,10 +16,13 @@ shipped with the MMTk release you use. Currently, the core provides the following tracepoints. -- `mmtk:collection_initialized()`: GC is enabled +- `mmtk:collection_initialized()`: All GC worker threads are spawn +- `mmtk:collection_uninitialized()`: All GC worker threads exited. This may happend during + execution, for example, if the VM needs to fork and requests GC workers to exit. - `mmtk:harness_begin()`: the timing iteration of a benchmark begins - `mmtk:harness_end()`: the timing iteration of a benchmark ends - `mmtk:gcworker_run()`: a GC worker thread enters its work loop +- `mmtk:gcworker_exit()`: a GC worker thread exits its work loop - `mmtk:gc_start()`: a collection epoch starts - `mmtk:gc_end()`: a collection epoch ends - `mmtk:process_edges(num_edges: int, is_roots: bool)`: a invocation of the `process_edges` From 54f586b92e6e4410ebc5db2040fc0f1e9e0ab556 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 31 Jan 2024 17:13:59 +0800 Subject: [PATCH 20/48] Revert the change on `GcStatus`. The change is unnecessary because GC workers now use the active goal to determine what to do when the last worker parked. --- src/global_state.rs | 28 ++++------------------------ src/mmtk.rs | 14 +++++++------- 2 files changed, 11 insertions(+), 31 deletions(-) diff --git a/src/global_state.rs b/src/global_state.rs index 1c7b7ee627..e066811557 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -1,7 +1,5 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; - -use atomic::Atomic; -use bytemuck::NoUninit; +use std::sync::Mutex; /// This stores some global states for an MMTK instance. /// Some MMTK components like plans and allocators may keep an reference to the struct, and can access it. @@ -19,7 +17,7 @@ pub struct GlobalState { /// bindings to temporarily disable GC, at which point, we do not trigger GC even if the heap is full. pub(crate) trigger_gc_when_heap_is_full: AtomicBool, /// The current GC status. - pub(crate) gc_status: Atomic, + pub(crate) gc_status: Mutex, /// Is the current GC an emergency collection? Emergency means we may run out of memory soon, and we should /// attempt to collect as much as we can. pub(crate) emergency_collection: AtomicBool, @@ -205,7 +203,7 @@ impl Default for GlobalState { Self { initialized: AtomicBool::new(false), trigger_gc_when_heap_is_full: AtomicBool::new(true), - gc_status: Atomic::new(GcStatus::NotInGC), + gc_status: Mutex::new(GcStatus::NotInGC), stacks_prepared: AtomicBool::new(false), emergency_collection: AtomicBool::new(false), user_triggered_collection: AtomicBool::new(false), @@ -224,27 +222,9 @@ impl Default for GlobalState { } } -/// GC status. -/// -/// - It starts in the `NotInGC` state. -/// - It enters `GcPrepare` when GC starts (i.e. when `ScheduleCollection` is executed). -/// - It enters `GcProper` when all mutators have stopped. -/// - It returns to `NotInGC` when GC finishes. -/// -/// All states except `NotInGC` are considered "in GC". -/// -/// The status is checked by GC workers. The last parked worker only tries to open new buckets -/// during GC. -/// -/// The distinction between `GcPrepare` and `GcProper` is inherited from JikesRVM. Several -/// assertions in JikesRVM involve those two states. -#[derive(PartialEq, Clone, Copy, NoUninit, Debug)] -#[repr(u8)] +#[derive(PartialEq)] pub enum GcStatus { - /// Not in GC NotInGC, - /// GC has started, but not all stacks have been scanned, yet. GcPrepare, - /// GC has started, and all stacks have been scanned. GcProper, } diff --git a/src/mmtk.rs b/src/mmtk.rs index 44ef399083..d6d4b27c02 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -255,15 +255,15 @@ impl MMTK { self.inside_sanity.load(Ordering::Relaxed) } - pub(crate) fn set_gc_status(&self, new_status: GcStatus) { - let old_status = self.state.gc_status.swap(new_status, Ordering::SeqCst); - debug_assert_ne!(old_status, new_status); - if old_status == GcStatus::NotInGC { + pub(crate) fn set_gc_status(&self, s: GcStatus) { + let mut gc_status = self.state.gc_status.lock().unwrap(); + if *gc_status == GcStatus::NotInGC { self.state.stacks_prepared.store(false, Ordering::SeqCst); // FIXME stats self.stats.start_gc(); } - if new_status == GcStatus::NotInGC { + *gc_status = s; + if *gc_status == GcStatus::NotInGC { // FIXME stats if self.stats.get_gathering_stats() { self.stats.end_gc(); @@ -273,12 +273,12 @@ impl MMTK { /// Return true if a collection is in progress. pub fn gc_in_progress(&self) -> bool { - self.state.gc_status.load(Ordering::SeqCst) != GcStatus::NotInGC + *self.state.gc_status.lock().unwrap() != GcStatus::NotInGC } /// Return true if a collection is in progress and past the preparatory stage. pub fn gc_in_progress_proper(&self) -> bool { - self.state.gc_status.load(Ordering::SeqCst) == GcStatus::GcProper + *self.state.gc_status.lock().unwrap() == GcStatus::GcProper } /// Return true if the current GC is an emergency GC. From 822f6f5825f778c135936d0b041a2bd541e4752e Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 1 Feb 2024 12:17:36 +0800 Subject: [PATCH 21/48] Fix clippy warning --- src/scheduler/worker.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index eb7b78b6a9..5f6838b408 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -240,8 +240,12 @@ enum WorkerCreationState { unspawned_local_work_queues: Vec>>>, }, Created { - /// `Worker` instances for worker threads that have not been created yet, or worker thread that - /// are suspended (e.g. for forking). + /// `Worker` instances for worker threads that have not been created yet, or worker thread + /// that are suspended (e.g. for forking). + // Note: Clippy warns about `Vec>` because `Vec` is already in the heap. + // However, the purpose of this `Vec` is allowing GC worker threads to give their + // `Box>` instances back to this pool. Therefore, the `Box` is necessary. + #[allow(clippy::vec_box)] suspended_workers: Vec>>, }, } From 7cb0b2c12be341c084e03f2cd943fad8ac088f83 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 1 Feb 2024 12:17:54 +0800 Subject: [PATCH 22/48] Formatting --- src/memory_manager.rs | 1 - src/scheduler/mod.rs | 2 +- src/scheduler/scheduler.rs | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 964d55270a..364ceff616 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -515,7 +515,6 @@ pub fn uninitialize_collection(mmtk: &'static MMTK) { probe!(mmtk, collection_uninitialized); } - /// Allow MMTk to trigger garbage collection when heap is full. This should only be used in pair with disable_collection(). /// See the comments on disable_collection(). If disable_collection() is not used, there is no need to call this function at all. /// Note this call is not thread safe, only one VM thread should call this. diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs index b4d79adeb8..6cc3b4da94 100644 --- a/src/scheduler/mod.rs +++ b/src/scheduler/mod.rs @@ -17,8 +17,8 @@ mod work_bucket; pub use work_bucket::WorkBucketStage; mod worker; -mod worker_monitor; mod worker_goals; +mod worker_monitor; pub(crate) use worker::current_worker_ordinal; pub use worker::GCWorker; diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 7a30007273..79972a9edb 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -395,7 +395,8 @@ impl GCWorkScheduler { return Ok(work); } - let should_exit = self.worker_monitor + let should_exit = self + .worker_monitor .park_and_wait(worker, |goals| self.on_last_parked(worker, goals)); if should_exit { From 176ad475b0ba05d6c1024b2c345ed203dcd02448 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 5 Feb 2024 17:28:02 +0800 Subject: [PATCH 23/48] Replace uninitialize_collection with prepare/after fork. --- src/memory_manager.rs | 33 +++-------------- src/mmtk.rs | 50 +++++++++++++++++++++++++ src/scheduler/scheduler.rs | 12 +++++- src/scheduler/worker.rs | 75 +++++++++++++++++++++++++------------- 4 files changed, 115 insertions(+), 55 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 364ceff616..f30eae8b40 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -480,39 +480,16 @@ pub fn start_worker( mmtk.scheduler.worker_group.surrender_gc_worker(worker); } -/// Initialize the scheduler and GC workers that are required for doing garbage collections. +/// Initialize the GC worker threads that are required for doing garbage collections. /// This is a mandatory call for a VM during its boot process once its thread system -/// is ready. This call will invoke Collection::spawn_gc_thread() to create GC threads. -/// -/// This function can also be called after `uninitialize_collection` is called in order to re-spawn -/// the GC thtreads. +/// is ready. This call will invoke `Collection::spawn_gc_thread()`` to create GC threads. /// /// Arguments: /// * `mmtk`: A reference to an MMTk instance. -/// * `tls`: The thread that wants to enable the collection. This value will be passed back to the VM in -/// Collection::spawn_gc_thread() so that the VM knows the context. +/// * `tls`: The thread that wants to enable the collection. This value will be passed back to +/// the VM in `Collection::spawn_gc_thread()` so that the VM knows the context. pub fn initialize_collection(mmtk: &'static MMTK, tls: VMThread) { - assert!( - !mmtk.state.is_initialized(), - "MMTk collection has been initialized (was initialize_collection() already called before?)" - ); - mmtk.scheduler.spawn_gc_threads(mmtk, tls); - mmtk.state.initialized.store(true, Ordering::SeqCst); - probe!(mmtk, collection_initialized); -} - -/// Ask GC worker threads to exit. Currently, this function is for the sole purpose of forking. -/// -/// Arguments: -/// * `mmtk`: A reference to an MMTk instance. -pub fn uninitialize_collection(mmtk: &'static MMTK) { - assert!( - mmtk.state.is_initialized(), - "MMTk collection has not been initialized, yet (was initialize_collection() called before?)" - ); - mmtk.scheduler.stop_gc_threads_for_forking(); - mmtk.state.initialized.store(false, Ordering::SeqCst); - probe!(mmtk, collection_uninitialized); + mmtk.initialize_collection(tls); } /// Allow MMTk to trigger garbage collection when heap is full. This should only be used in pair with disable_collection(). diff --git a/src/mmtk.rs b/src/mmtk.rs index d6d4b27c02..7e665c212e 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -220,6 +220,56 @@ impl MMTK { } } + /// Initialize the GC worker threads that are required for doing garbage collections. + /// This is a mandatory call for a VM during its boot process once its thread system + /// is ready. This call will invoke `Collection::spawn_gc_thread()`` to create GC threads. + /// + /// # Arguments + /// + /// * `tls`: The thread that wants to enable the collection. This value will be passed back + /// to the VM in `Collection::spawn_gc_thread()` so that the VM knows the context. + pub fn initialize_collection(&'static self, tls: VMThread) { + assert!( + !self.state.is_initialized(), + "MMTk collection has been initialized (was initialize_collection() already called before?)" + ); + self.scheduler.spawn_gc_threads(self, tls); + self.state.initialized.store(true, Ordering::SeqCst); + probe!(mmtk, collection_initialized); + } + + /// Prepare an MMTk instance for calling the `fork()` system call. + /// + /// This function makes all MMTk threads (currently including GC worker threads) save their + /// contexts and stop. A subsequent call to `MMTK::after_fork()` will re-spawn the threads + /// using the saved contexts. + /// + /// This function returns when all MMTK threads stopped. + pub fn prepare_to_fork(&'static self) { + assert!( + self.state.is_initialized(), + "MMTk collection has not been initialized, yet (was initialize_collection() called before?)" + ); + self.scheduler.stop_gc_threads_for_forking(); + } + + /// Call this function after the VM called the `fork()` system call. + /// + /// This function will re-spawn MMTk threads from saved contexts. + /// + /// # Arguments + /// + /// * `tls`: The thread that wants to respawn MMTk threads after forking. This value will be + /// passed back to the VM in `Collection::spawn_gc_thread()` so that the VM knows the + /// context. + pub fn after_fork(&'static self, tls: VMThread) { + assert!( + self.state.is_initialized(), + "MMTk collection has not been initialized, yet (was initialize_collection() called before?)" + ); + self.scheduler.respawn_gc_threads_after_forking(tls); + } + /// Generic hook to allow benchmarks to be harnessed. MMTk will trigger a GC /// to clear any residual garbage and start collecting statistics for the benchmark. /// This is usually called by the benchmark harness as its last step before the actual benchmark. diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 79972a9edb..960fca80e8 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -85,13 +85,16 @@ impl GCWorkScheduler { self.worker_group.as_ref().worker_count() } - /// Create GC threads, including all workers. + /// Create GC threads, including all workers, for the first time. pub fn spawn_gc_threads(self: &Arc, mmtk: &'static MMTK, tls: VMThread) { - self.worker_group.spawn(mmtk, tls) + self.worker_group.create_workers(mmtk); + self.worker_group.spawn(tls) } /// Ask all GC workers to exit for forking, and wait until all workers exited. pub fn stop_gc_threads_for_forking(self: &Arc) { + self.worker_group.prepare_surrender_buffer(); + debug!("A mutator is requesting GC threads to stop for forking..."); self.worker_monitor.make_request(|requests| { if !requests.stop_for_fork { @@ -107,6 +110,11 @@ impl GCWorkScheduler { self.worker_monitor.on_all_workers_exited(); } + /// Respawn GC threads after forking. + pub fn respawn_gc_threads_after_forking(self: &Arc, tls: VMThread) { + self.worker_group.spawn(tls) + } + /// Resolve the affinity of a thread. pub fn resolve_affinity(&self, thread: ThreadId) { self.affinity.resolve_affinity(thread); diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 5f6838b408..5d19d003ac 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -197,7 +197,12 @@ impl GCWorker { /// Each worker will keep polling and executing work packets in a loop. pub fn run(&mut self, tls: VMWorkerThread, mmtk: &'static MMTK) { probe!(mmtk, gcworker_run); - trace!("Worker {} started.", self.ordinal); + debug!( + "Worker started. PID: {}, tid: {}, ordinal: {}", + unsafe { libc::getpid() }, + unsafe { libc::gettid() }, + self.ordinal + ); WORKER_ORDINAL.with(|x| x.store(self.ordinal, Ordering::SeqCst)); self.scheduler.resolve_affinity(self.ordinal); self.tls = tls; @@ -229,17 +234,25 @@ impl GCWorker { probe!(mmtk, work, typename.as_ptr(), typename.len()); work.do_work_with_stat(self, mmtk); } - trace!("Worker {} exiting...", self.ordinal); + debug!( + "Worker exiting. PID: {}, tid: {}, ordinal: {}", + unsafe { libc::getpid() }, + unsafe { libc::gettid() }, + self.ordinal + ); probe!(mmtk, gcworker_exit); } } enum WorkerCreationState { + /// `GCWorker`` structs have not been created yet. NotCreated { /// The local work queues for to-be-created workers. unspawned_local_work_queues: Vec>>>, }, - Created { + /// `GCWorker`` structs are created, but worker threads are either not been spawn, yet, or in + /// the progress of stopping for forking. + Resting { /// `Worker` instances for worker threads that have not been created yet, or worker thread /// that are suspended (e.g. for forking). // Note: Clippy warns about `Vec>` because `Vec` is already in the heap. @@ -248,12 +261,9 @@ enum WorkerCreationState { #[allow(clippy::vec_box)] suspended_workers: Vec>>, }, -} - -impl WorkerCreationState { - pub fn is_created(&self) -> bool { - matches!(self, WorkerCreationState::Created { .. }) - } + /// All `GCWorker`` structs have been transferred to worker threads, and worker threads are + /// running. + Spawned, } /// A worker group to manage all the GC workers. @@ -297,8 +307,11 @@ impl WorkerGroup { } /// Create `GCWorker` instances, but do not create threads, yet. - fn create_worker_structs(&self, state: &mut WorkerCreationState, mmtk: &'static MMTK) { - let WorkerCreationState::NotCreated { unspawned_local_work_queues } = state else { + pub fn create_workers(&self, mmtk: &'static MMTK) { + debug!("Creating GCWorker instances..."); + let mut state = self.state.lock().unwrap(); + + let WorkerCreationState::NotCreated { ref mut unspawned_local_work_queues } = *state else { panic!("GCWorker structs have already been created"); }; @@ -317,20 +330,16 @@ impl WorkerGroup { suspended_workers.push(worker); } - *state = WorkerCreationState::Created { suspended_workers }; + *state = WorkerCreationState::Resting { suspended_workers }; + debug!("GCWorker instances created."); } /// Spawn all the worker threads - pub fn spawn(&self, mmtk: &'static MMTK, tls: VMThread) { - let mut guard = self.state.lock().unwrap(); - - // Create worker structs lazily. If we are spawning workers after fork, worker structs - // will have been created already, so we can skip the creation. - if !guard.is_created() { - self.create_worker_structs(&mut *guard, mmtk); - } + pub fn spawn(&self, tls: VMThread) { + debug!("Spawning GC workers. PID: {}", unsafe { libc::getpid() }); + let mut state = self.state.lock().unwrap(); - let WorkerCreationState::Created { ref mut suspended_workers } = *guard else { + let WorkerCreationState::Resting { ref mut suspended_workers } = *state else { panic!("GCWorker structs have not been created, yet."); }; @@ -339,13 +348,27 @@ impl WorkerGroup { VM::VMCollection::spawn_gc_thread(tls, GCThreadContext::::Worker(worker)); } - debug!("Spawned {} worker threads.", self.worker_count()); + *state = WorkerCreationState::Spawned; + + debug!( + "Spawned {} worker threads. PID: {}", + self.worker_count(), + unsafe { libc::getpid() } + ); + } + + /// Prepare the buffer for workers to surrender their `GCWorker` structs. + pub fn prepare_surrender_buffer(&self) { + let mut state = self.state.lock().unwrap(); + *state = WorkerCreationState::Resting { + suspended_workers: Vec::with_capacity(self.worker_count()), + } } /// Surrender the `GCWorker` struct when a GC worker exits. pub fn surrender_gc_worker(&self, worker: Box>) { let mut state = self.state.lock().unwrap(); - let WorkerCreationState::Created { ref mut suspended_workers } = *state else { + let WorkerCreationState::Resting { ref mut suspended_workers } = *state else { panic!("GCWorker structs have not been created, yet."); }; let ordinal = worker.ordinal; @@ -366,13 +389,15 @@ impl WorkerGroup { pub fn wait_until_worker_exited(&self) { let guard = self.state.lock().unwrap(); let _guard = self.cond_all_workers_exited.wait_while(guard, |state| { - let WorkerCreationState::Created { ref suspended_workers } = *state else { + let WorkerCreationState::Resting { ref suspended_workers } = *state else { panic!("GCWorker structs have not been created, yet."); }; suspended_workers.len() != self.worker_count() }); - debug!("All workers have exited."); + debug!("All workers have exited. PID: {}", unsafe { + libc::getpid() + }); } /// Get the number of workers in the group From d3ddf7d73b29d5459261fd4eeffc0d649a783f89 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 7 Feb 2024 16:14:19 +0800 Subject: [PATCH 24/48] Fix unused warning --- src/memory_manager.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 53b8d55f6b..86468bc370 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -25,7 +25,7 @@ use crate::util::{Address, ObjectReference}; use crate::vm::edge_shape::MemorySlice; use crate::vm::ReferenceGlue; use crate::vm::VMBinding; -use std::sync::atomic::Ordering; + /// Initialize an MMTk instance. A VM should call this method after creating an [`crate::MMTK`] /// instance but before using any of the methods provided in MMTk (except `process()` and `process_bulk()`). /// @@ -438,6 +438,7 @@ pub fn free_with_size(mmtk: &MMTK, addr: Address, old_size: u /// Get the current active malloc'd bytes. Here MMTk only accounts for bytes that are done through those 'counted malloc' functions. #[cfg(feature = "malloc_counted_size")] pub fn get_malloc_bytes(mmtk: &MMTK) -> usize { + use std::sync::atomic::Ordering; mmtk.state.malloc_bytes.load(Ordering::SeqCst) } @@ -536,6 +537,7 @@ pub fn free_bytes(mmtk: &MMTK) -> usize { /// to call this method is at the end of a GC (e.g. when the runtime is about to resume threads). #[cfg(feature = "count_live_bytes_in_gc")] pub fn live_bytes_in_last_gc(mmtk: &MMTK) -> usize { + use std::sync::atomic::Ordering; mmtk.state.live_bytes_in_last_gc.load(Ordering::SeqCst) } From 5c125fbf4f0199398a682026bacc64e868a9aba3 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 7 Feb 2024 19:43:55 +0800 Subject: [PATCH 25/48] Do not wait for all workers to give in their structs --- src/memory_manager.rs | 2 +- src/mmtk.rs | 25 +++++++++++++++++++++---- src/scheduler/scheduler.rs | 10 ++++++++-- src/scheduler/worker.rs | 31 +++++-------------------------- 4 files changed, 35 insertions(+), 33 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 86468bc370..e3cf6b52be 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -477,7 +477,7 @@ pub fn start_worker( mut worker: Box>, ) { worker.run(tls, mmtk); - mmtk.scheduler.worker_group.surrender_gc_worker(worker); + mmtk.scheduler.surrender_gc_worker(worker); } /// Initialize the GC worker threads that are required for doing garbage collections. diff --git a/src/mmtk.rs b/src/mmtk.rs index d95e23ee88..7c96b9a1d3 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -240,11 +240,28 @@ impl MMTK { /// Prepare an MMTk instance for calling the `fork()` system call. /// - /// This function makes all MMTk threads (currently including GC worker threads) save their - /// contexts and stop. A subsequent call to `MMTK::after_fork()` will re-spawn the threads - /// using the saved contexts. + /// The `fork()` system call is available on Linux and some UNIX variants, and may be emulated + /// on other platforms by libraries such as Cygwin. If `fork()` is called when the process has + /// multiple threads, it will only duplicate the current thread into the child process, and the + /// child process can only call async-signal-safe functions, notably `exec()`. For VMs that + /// that use multi-process concurrency, it is imperative that when calling `fork()`, only one + /// thread may exist in the process. /// - /// This function returns when all MMTK threads stopped. + /// This function helps VMs that use `fork()` for multi-process concurrency. It instructs all + /// GC threads to save their contexts and return from their entry-point functions. + /// + /// Currently, such threads only include GC workers, and the entry point is + /// [`crate::memory_manager::start_worker`]. + /// + /// A subsequent call to `MMTK::after_fork()` will re-spawn the threads using their saved + /// contexts. + /// + /// # Caution! + /// + /// This function sends an asynchronous message to GC threads and returns immediately, but it + /// is only safe for the VM to call `fork()` after the underlying **native threads** of the GC + /// threads have exited. After calling this function, the VM should wait for their underlying + /// native threads to exit in VM-specific manner before calling `fork()`. pub fn prepare_to_fork(&'static self) { assert!( self.state.is_initialized(), diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 960fca80e8..925fb11ed3 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -104,10 +104,16 @@ impl GCWorkScheduler { false } }); + } - self.worker_group.wait_until_worker_exited(); + /// Surrender the `GCWorker` struct of a GC worker when it exits. + pub fn surrender_gc_worker(&self, worker: Box>) { + let all_surrendered = self.worker_group.return_gc_worker_struct(worker); - self.worker_monitor.on_all_workers_exited(); + if all_surrendered { + debug!("All {} workers surrendered.", self.worker_group.worker_count()); + self.worker_monitor.on_all_workers_exited(); + } } /// Respawn GC threads after forking. diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 5d19d003ac..9de6e6c045 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -12,7 +12,7 @@ use crossbeam::queue::ArrayQueue; #[cfg(feature = "count_live_bytes_in_gc")] use std::sync::atomic::AtomicUsize; use std::sync::atomic::Ordering; -use std::sync::{Arc, Condvar, Mutex}; +use std::sync::{Arc, Mutex}; /// Represents the ID of a GC worker thread. pub type ThreadId = usize; @@ -272,9 +272,6 @@ pub(crate) struct WorkerGroup { pub workers_shared: Vec>>, /// The stateful part state: Mutex>, - /// The condition of "all workers exited", i.e. the number of suspended workers is equal to the - /// number of workers. - cond_all_workers_exited: Condvar, } /// We have to persuade Rust that `WorkerGroup` is safe to share because it thinks one worker can @@ -302,7 +299,6 @@ impl WorkerGroup { state: Mutex::new(WorkerCreationState::NotCreated { unspawned_local_work_queues, }), - cond_all_workers_exited: Default::default(), }) } @@ -365,8 +361,9 @@ impl WorkerGroup { } } - /// Surrender the `GCWorker` struct when a GC worker exits. - pub fn surrender_gc_worker(&self, worker: Box>) { + /// Return the `GCWorker` struct to the worker group. + /// This function returns `true` if all workers returned their `GCWorker` structs. + pub fn return_gc_worker_struct(&self, worker: Box>) -> bool { let mut state = self.state.lock().unwrap(); let WorkerCreationState::Resting { ref mut suspended_workers } = *state else { panic!("GCWorker structs have not been created, yet."); @@ -379,25 +376,7 @@ impl WorkerGroup { suspended_workers.len(), self.worker_count() ); - if suspended_workers.len() == self.worker_count() { - debug!("All {} workers surrendered.", self.worker_count()); - self.cond_all_workers_exited.notify_all(); - } - } - - /// Wait until all workers exited. - pub fn wait_until_worker_exited(&self) { - let guard = self.state.lock().unwrap(); - let _guard = self.cond_all_workers_exited.wait_while(guard, |state| { - let WorkerCreationState::Resting { ref suspended_workers } = *state else { - panic!("GCWorker structs have not been created, yet."); - }; - suspended_workers.len() != self.worker_count() - }); - - debug!("All workers have exited. PID: {}", unsafe { - libc::getpid() - }); + suspended_workers.len() == self.worker_count() } /// Get the number of workers in the group From 41ee28cdc3c1c2f3f10f8e95ff21d8af7ddf9aa3 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Feb 2024 11:05:01 +0800 Subject: [PATCH 26/48] Fix comment --- src/memory_manager.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index e3cf6b52be..7842acd2fa 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -464,7 +464,7 @@ pub fn gc_poll(mmtk: &MMTK, tls: VMMutatorThread) { /// Run the main loop of a GC worker. /// /// This method runs until the worker exits. Currently a worker may exit after -/// `uninitialize_collection` is called. +/// [`crate::mmtk::MMTK::prepare_to_fork`] is called. /// /// Arguments: /// * `tls`: The thread that will be used as the GC worker. From c4ab8b4c9324b1432f234e23b186bf426f538b2b Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Feb 2024 11:34:27 +0800 Subject: [PATCH 27/48] Only print TID on linux. --- src/scheduler/worker.rs | 23 ++++++++++++----------- src/util/rust_util/mod.rs | 14 ++++++++++++++ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 9de6e6c045..730a525602 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -198,10 +198,9 @@ impl GCWorker { pub fn run(&mut self, tls: VMWorkerThread, mmtk: &'static MMTK) { probe!(mmtk, gcworker_run); debug!( - "Worker started. PID: {}, tid: {}, ordinal: {}", - unsafe { libc::getpid() }, - unsafe { libc::gettid() }, - self.ordinal + "Worker started. ordinal: {}, {}", + self.ordinal, + crate::util::rust_util::debug_process_thread_id(), ); WORKER_ORDINAL.with(|x| x.store(self.ordinal, Ordering::SeqCst)); self.scheduler.resolve_affinity(self.ordinal); @@ -235,10 +234,9 @@ impl GCWorker { work.do_work_with_stat(self, mmtk); } debug!( - "Worker exiting. PID: {}, tid: {}, ordinal: {}", - unsafe { libc::getpid() }, - unsafe { libc::gettid() }, - self.ordinal + "Worker exiting. ordinal: {}, {}", + self.ordinal, + crate::util::rust_util::debug_process_thread_id(), ); probe!(mmtk, gcworker_exit); } @@ -332,7 +330,10 @@ impl WorkerGroup { /// Spawn all the worker threads pub fn spawn(&self, tls: VMThread) { - debug!("Spawning GC workers. PID: {}", unsafe { libc::getpid() }); + debug!( + "Spawning GC workers. {}", + crate::util::rust_util::debug_process_thread_id(), + ); let mut state = self.state.lock().unwrap(); let WorkerCreationState::Resting { ref mut suspended_workers } = *state else { @@ -347,9 +348,9 @@ impl WorkerGroup { *state = WorkerCreationState::Spawned; debug!( - "Spawned {} worker threads. PID: {}", + "Spawned {} worker threads. {}", self.worker_count(), - unsafe { libc::getpid() } + crate::util::rust_util::debug_process_thread_id(), ); } diff --git a/src/util/rust_util/mod.rs b/src/util/rust_util/mod.rs index efd7224881..062df368f4 100644 --- a/src/util/rust_util/mod.rs +++ b/src/util/rust_util/mod.rs @@ -161,3 +161,17 @@ mod initialize_once_tests { assert_eq!(INITIALIZE_COUNT.load(Ordering::SeqCst), 1); } } + +/// Create a formatted string that makes the best effort idenfying the current process and thread. +pub fn debug_process_thread_id() -> String { + let pid = unsafe { libc::getpid() }; + if cfg!(target_os = "linux") { + // `gettid()` is Linux-specific. + let tid = unsafe { libc::gettid() }; + format!("PID: {}, TID: {}", pid, tid) + } else { + // TODO: When we support other platforms, use platform-specific methods to get thread + // identifiers. + format!("PID: {}", pid) + } +} From d9b451a3d8e9d157293cc90c5fcf5e421faf8465 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Feb 2024 12:13:03 +0800 Subject: [PATCH 28/48] Fix comments --- src/mmtk.rs | 6 +++++- src/scheduler/scheduler.rs | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 7c96b9a1d3..56bbf7a9c9 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -222,7 +222,11 @@ impl MMTK { /// Initialize the GC worker threads that are required for doing garbage collections. /// This is a mandatory call for a VM during its boot process once its thread system - /// is ready. This call will invoke `Collection::spawn_gc_thread()`` to create GC threads. + /// is ready. This function should be called exactly once, and should be called before + /// allocating objects in the MMTk heap. + /// + /// Internally, this function will spawn the GC worker threads, and the `tls` argument is used + /// for this purpose. /// /// # Arguments /// diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 925fb11ed3..a9c1ffc1c1 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -111,7 +111,10 @@ impl GCWorkScheduler { let all_surrendered = self.worker_group.return_gc_worker_struct(worker); if all_surrendered { - debug!("All {} workers surrendered.", self.worker_group.worker_count()); + debug!( + "All {} workers surrendered.", + self.worker_group.worker_count() + ); self.worker_monitor.on_all_workers_exited(); } } From 7a2074ea7c9d29cfa7e686c0c0d2e832b1f66232 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Feb 2024 15:10:12 +0800 Subject: [PATCH 29/48] Refactor WorkerRequest and fix comments Use WorkerRequest to abstract out common access patterns. --- src/mmtk.rs | 3 ++- src/plan/gc_requester.rs | 4 ++-- src/scheduler/scheduler.rs | 40 ++++++++++++-------------------- src/scheduler/worker_goals.rs | 43 ++++++++++++++++++++++++++++++++--- 4 files changed, 59 insertions(+), 31 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 56bbf7a9c9..a1ac528c90 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -258,7 +258,8 @@ impl MMTK { /// [`crate::memory_manager::start_worker`]. /// /// A subsequent call to `MMTK::after_fork()` will re-spawn the threads using their saved - /// contexts. + /// contexts. The VM must not allocate objects in the MMTk heap before calling + /// `MMTK::after_fork()`. /// /// # Caution! /// diff --git a/src/plan/gc_requester.rs b/src/plan/gc_requester.rs index 49ae9ce2c9..e3a8462f96 100644 --- a/src/plan/gc_requester.rs +++ b/src/plan/gc_requester.rs @@ -28,8 +28,8 @@ impl GCRequester { if !self.request_flag.swap(true, Ordering::Relaxed) { // `GCWorkScheduler::request_schedule_collection` needs to hold a mutex to communicate - // with GC workers, so only the first mutator that changed the `request_flag` to `true` - // shall do it. + // with GC workers, which is expensive for functions like `poll`. We use the atomic + // flag `request_flag` to elide the need to acquire the mutex in subsequent calls. self.scheduler.request_schedule_collection(); } } diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index a9c1ffc1c1..4dd1d53e7b 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -26,7 +26,7 @@ pub struct GCWorkScheduler { pub work_buckets: EnumMap>, /// Workers pub(crate) worker_group: Arc>, - /// Monitor for worker synchronization, including a mutex and conditional variables. + /// For synchronized communication between workers and with mutators. pub(crate) worker_monitor: Arc, /// How to assign the affinity of each GC thread. Specified by the user. affinity: AffinityKind, @@ -85,25 +85,22 @@ impl GCWorkScheduler { self.worker_group.as_ref().worker_count() } - /// Create GC threads, including all workers, for the first time. + /// Create GC threads for the first time. It will also create the `GCWorker` instances. + /// + /// Currently GC threads only include worker threads, and we currently have only one worker + /// group. We may add more worker groups in the future. pub fn spawn_gc_threads(self: &Arc, mmtk: &'static MMTK, tls: VMThread) { self.worker_group.create_workers(mmtk); self.worker_group.spawn(tls) } - /// Ask all GC workers to exit for forking, and wait until all workers exited. + /// Ask all GC workers to exit for forking. pub fn stop_gc_threads_for_forking(self: &Arc) { self.worker_group.prepare_surrender_buffer(); debug!("A mutator is requesting GC threads to stop for forking..."); - self.worker_monitor.make_request(|requests| { - if !requests.stop_for_fork { - requests.stop_for_fork = true; - true - } else { - false - } - }); + self.worker_monitor + .make_request(|requests| requests.stop_for_fork.set()); } /// Surrender the `GCWorker` struct of a GC worker when it exits. @@ -119,7 +116,8 @@ impl GCWorkScheduler { } } - /// Respawn GC threads after forking. + /// Respawn GC threads after forking. This will reuse the `GCWorker` instances of stopped + /// workers. pub fn respawn_gc_threads_after_forking(self: &Arc, tls: VMThread) { self.worker_group.spawn(tls) } @@ -132,14 +130,8 @@ impl GCWorkScheduler { /// Request a GC to be scheduled. Called by mutator via `GCRequester`. pub(crate) fn request_schedule_collection(&self) { debug!("A mutator is sending GC-scheduling request to workers..."); - self.worker_monitor.make_request(|requests| { - if !requests.gc { - requests.gc = true; - true - } else { - false - } - }); + self.worker_monitor + .make_request(|requests| requests.gc.set()); } /// Add the `ScheduleCollection` packet. Called by the last parked worker. @@ -437,7 +429,7 @@ impl GCWorkScheduler { // In stop-the-world GC, mutators cannot request for GC while GC is in progress. // When we support concurrent GC, we should remove this assertion. assert!( - !goals.requests.gc, + !goals.requests.gc.debug_is_set(), "GC request sent to WorkerMonitor while GC is still in progress." ); @@ -472,9 +464,8 @@ impl GCWorkScheduler { fn respond_to_requests(&self, goals: &mut WorkerGoals) -> LastParkedResult { assert!(goals.current.is_none()); - if goals.requests.gc { + if goals.requests.gc.poll() { trace!("A mutator requested a GC to be scheduled."); - goals.requests.gc = false; // We set the eBPF trace point here so that bpftrace scripts can start recording work // packet events before the `ScheduleCollection` work packet starts. @@ -488,9 +479,8 @@ impl GCWorkScheduler { return LastParkedResult::WakeSelf; } - if goals.requests.stop_for_fork { + if goals.requests.stop_for_fork.poll() { trace!("A mutator wanted to fork."); - goals.requests.stop_for_fork = false; goals.current = Some(WorkerGoal::StopForFork); return LastParkedResult::WakeAll; diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs index 0dc991f9f0..73d7a3f17e 100644 --- a/src/scheduler/worker_goals.rs +++ b/src/scheduler/worker_goals.rs @@ -39,10 +39,47 @@ pub(crate) enum WorkerGoal { /// thing with the highest priority. /// /// The fields of this structs are ordered with decreasing priority. -#[derive(Default, Debug)] // All fields should be false by default. +#[derive(Default, Debug)] pub(crate) struct WorkerRequests { /// The VM needs to fork. Workers should save their contexts and exit. - pub(crate) stop_for_fork: bool, + pub(crate) stop_for_fork: WorkerRequest, /// GC is requested. Workers should schedule a GC. - pub(crate) gc: bool, + pub(crate) gc: WorkerRequest, +} + +/// To record whether a specific goal has been reqeuested. +/// It is basically a wrapper of `bool`, but forces it to be accessed in a particular way. +#[derive(Default, Debug)] // Default: False by default. +pub(crate) struct WorkerRequest { + /// True if the goal has been requested. + requested: bool, +} + +impl WorkerRequest { + /// Set the goal as requested. Return `true` if its requested state changed from `false` to + /// `true`. + pub fn set(&mut self) -> bool { + if !self.requested { + self.requested = true; + true + } else { + false + } + } + + /// Get the requested state and clear it. Return `true` if the requested state was `true`. + pub fn poll(&mut self) -> bool { + if self.requested { + self.requested = false; + true + } else { + false + } + } + + /// Test if the request is set. For debug only. The last parked worker should use `poll` to + /// get the state and clear it. + pub fn debug_is_set(&self) -> bool { + self.requested + } } From eb1d8dfd8fd1bb41e618884052cab536c27d378d Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Feb 2024 20:44:21 +0800 Subject: [PATCH 30/48] Use EnumMap for requested goals --- src/global_state.rs | 6 ++ src/scheduler/scheduler.rs | 77 ++++++++++++++----------- src/scheduler/worker_goals.rs | 99 +++++++++++++++------------------ src/scheduler/worker_monitor.rs | 25 +++------ 4 files changed, 103 insertions(+), 104 deletions(-) diff --git a/src/global_state.rs b/src/global_state.rs index 4823119851..f355e697f2 100644 --- a/src/global_state.rs +++ b/src/global_state.rs @@ -1,5 +1,8 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Mutex; +use std::time::Instant; + +use atomic_refcell::AtomicRefCell; /// This stores some global states for an MMTK instance. /// Some MMTK components like plans and allocators may keep an reference to the struct, and can access it. @@ -15,6 +18,8 @@ pub struct GlobalState { pub(crate) initialized: AtomicBool, /// The current GC status. pub(crate) gc_status: Mutex, + /// When did the last GC start? Only accessed by the last parked worker. + pub(crate) gc_start_time: AtomicRefCell>, /// Is the current GC an emergency collection? Emergency means we may run out of memory soon, and we should /// attempt to collect as much as we can. pub(crate) emergency_collection: AtomicBool, @@ -195,6 +200,7 @@ impl Default for GlobalState { Self { initialized: AtomicBool::new(false), gc_status: Mutex::new(GcStatus::NotInGC), + gc_start_time: AtomicRefCell::new(None), stacks_prepared: AtomicBool::new(false), emergency_collection: AtomicBool::new(false), user_triggered_collection: AtomicBool::new(false), diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 4dd1d53e7b..9ed448eed8 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -99,8 +99,7 @@ impl GCWorkScheduler { self.worker_group.prepare_surrender_buffer(); debug!("A mutator is requesting GC threads to stop for forking..."); - self.worker_monitor - .make_request(|requests| requests.stop_for_fork.set()); + self.worker_monitor.make_request(WorkerGoal::StopForFork); } /// Surrender the `GCWorker` struct of a GC worker when it exits. @@ -130,8 +129,7 @@ impl GCWorkScheduler { /// Request a GC to be scheduled. Called by mutator via `GCRequester`. pub(crate) fn request_schedule_collection(&self) { debug!("A mutator is sending GC-scheduling request to workers..."); - self.worker_monitor - .make_request(|requests| requests.gc.set()); + self.worker_monitor.make_request(WorkerGoal::Gc); } /// Add the `ScheduleCollection` packet. Called by the last parked worker. @@ -417,19 +415,19 @@ impl GCWorkScheduler { /// Called when the last worker parked. /// `should_schedule_gc` is true if a mutator requested a GC. fn on_last_parked(&self, worker: &GCWorker, goals: &mut WorkerGoals) -> LastParkedResult { - let Some(ref current_goal) = goals.current else { + let Some(ref current_goal) = goals.current() else { // There is no goal. Find a request to respond to. - return self.respond_to_requests(goals); + return self.respond_to_requests(worker, goals); }; match current_goal { - WorkerGoal::Gc { start_time } => { + WorkerGoal::Gc => { // We are in the progress of GC. // In stop-the-world GC, mutators cannot request for GC while GC is in progress. // When we support concurrent GC, we should remove this assertion. assert!( - !goals.requests.gc.debug_is_set(), + !goals.debug_is_requested(WorkerGoal::Gc), "GC request sent to WorkerMonitor while GC is still in progress." ); @@ -446,11 +444,11 @@ impl GCWorkScheduler { LastParkedResult::WakeAll } else { // GC finished. - self.on_gc_finished(worker, start_time); + self.on_gc_finished(worker); // Clear the current goal - goals.current = None; - self.respond_to_requests(goals) + goals.on_current_goal_completed(); + self.respond_to_requests(worker, goals) } } WorkerGoal::StopForFork => { @@ -461,33 +459,40 @@ impl GCWorkScheduler { } /// Respond to a worker reqeust. - fn respond_to_requests(&self, goals: &mut WorkerGoals) -> LastParkedResult { - assert!(goals.current.is_none()); - - if goals.requests.gc.poll() { - trace!("A mutator requested a GC to be scheduled."); - - // We set the eBPF trace point here so that bpftrace scripts can start recording work - // packet events before the `ScheduleCollection` work packet starts. - probe!(mmtk, gc_start); + fn respond_to_requests( + &self, + worker: &GCWorker, + goals: &mut WorkerGoals, + ) -> LastParkedResult { + assert!(goals.current().is_none()); + + let Some(goal) = goals.poll_next_goal() else { + // No reqeusts. Park this worker, too. + return LastParkedResult::ParkSelf; + }; - goals.current = Some(WorkerGoal::Gc { - start_time: Instant::now(), - }); + match goal { + WorkerGoal::Gc => { + trace!("A mutator requested a GC to be scheduled."); - self.add_schedule_collection_packet(); - return LastParkedResult::WakeSelf; - } + // We set the eBPF trace point here so that bpftrace scripts can start recording + // work packet events before the `ScheduleCollection` work packet starts. + probe!(mmtk, gc_start); - if goals.requests.stop_for_fork.poll() { - trace!("A mutator wanted to fork."); + { + let mut gc_start_time = worker.mmtk.state.gc_start_time.borrow_mut(); + assert!(gc_start_time.is_none(), "GC already started?"); + *gc_start_time = Some(Instant::now()); + } - goals.current = Some(WorkerGoal::StopForFork); - return LastParkedResult::WakeAll; + self.add_schedule_collection_packet(); + LastParkedResult::WakeSelf + } + WorkerGoal::StopForFork => { + trace!("A mutator wanted to fork."); + LastParkedResult::WakeAll + } } - - // No reqeusts. Park this worker, too. - LastParkedResult::ParkSelf } /// Find more work for workers to do. Return true if more work is available. @@ -515,7 +520,7 @@ impl GCWorkScheduler { /// Called when GC has finished, i.e. when all work packets have been executed. /// Return `true` if it scheduled the next GC immediately. - fn on_gc_finished(&self, worker: &GCWorker, start_time: &Instant) { + fn on_gc_finished(&self, worker: &GCWorker) { // All GC workers must have parked by now. debug_assert!(!self.worker_group.has_designated_work()); debug_assert!(self.all_buckets_empty()); @@ -530,6 +535,10 @@ impl GCWorkScheduler { mmtk.gc_trigger.policy.on_gc_end(mmtk); // Compute the elapsed time of the GC. + let start_time = { + let mut gc_start_time = worker.mmtk.state.gc_start_time.borrow_mut(); + gc_start_time.take().expect("GC not started yet?") + }; let elapsed = start_time.elapsed(); info!( diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs index 73d7a3f17e..3bf367d8cf 100644 --- a/src/scheduler/worker_goals.rs +++ b/src/scheduler/worker_goals.rs @@ -7,79 +7,72 @@ //! //! - When in the progress of GC, the last parker will try to open buckets or announce the GC //! has finished. -//! - When stopping for fork, every worker should exit when waken. +//! - When stopping for fork, every waken worker should save its thread state (giving in the +//! `GCWorker` struct) and exit. //! -//! The struct `WorkerRequests` keeps a list of requests from mutators, such as requests for GC -//! and requests for forking. But the GC workers will only respond to one request at a time. +//! The struct `WorkerGoals` keeps the set of goals requested by mutators, but GC workers will only +//! respond to one request at a time, and will favor higher-priority goals. -use std::time::Instant; +use enum_map::{Enum, EnumMap}; /// This current and reqeusted goals. #[derive(Default, Debug)] pub(crate) struct WorkerGoals { - /// What are the workers doing now? - pub(crate) current: Option, - /// Requests received from mutators. - pub(crate) requests: WorkerRequests, + /// The current goal. + current: Option, + /// Requests received from mutators. `requests[goal]` is true if the `goal` is requested. + requests: EnumMap, } -/// The thing workers are currently doing. This affects several things, such as what the last -/// parked worker will do, and whether workers will stop themselves. -#[derive(Debug)] +/// A goal, i.e. something that workers should work together to achieve. +/// +/// Members of this `enum` should be listed from the highest priority to the lowest priority. +#[derive(Debug, Enum, Clone, Copy)] pub(crate) enum WorkerGoal { - Gc { - start_time: Instant, - }, - #[allow(unused)] // TODO: Implement forking support later. + /// Do a garbage collection. + Gc, + /// Stop all GC threads so that the VM can call `fork()`. StopForFork, } -/// Reqeusts received from mutators. Workers respond to those requests when they do not have a -/// current goal. Multiple things can be requested at the same time, and workers respond to the -/// thing with the highest priority. -/// -/// The fields of this structs are ordered with decreasing priority. -#[derive(Default, Debug)] -pub(crate) struct WorkerRequests { - /// The VM needs to fork. Workers should save their contexts and exit. - pub(crate) stop_for_fork: WorkerRequest, - /// GC is requested. Workers should schedule a GC. - pub(crate) gc: WorkerRequest, -} - -/// To record whether a specific goal has been reqeuested. -/// It is basically a wrapper of `bool`, but forces it to be accessed in a particular way. -#[derive(Default, Debug)] // Default: False by default. -pub(crate) struct WorkerRequest { - /// True if the goal has been requested. - requested: bool, -} - -impl WorkerRequest { - /// Set the goal as requested. Return `true` if its requested state changed from `false` to - /// `true`. - pub fn set(&mut self) -> bool { - if !self.requested { - self.requested = true; +impl WorkerGoals { + /// Set the `goal` as requested. Return `true` if the requested state of the `goal` changed + /// from `false` to `true`. + pub fn set_request(&mut self, goal: WorkerGoal) -> bool { + if !self.requests[goal] { + self.requests[goal] = true; true } else { false } } - /// Get the requested state and clear it. Return `true` if the requested state was `true`. - pub fn poll(&mut self) -> bool { - if self.requested { - self.requested = false; - true - } else { - false + /// Move the highest priority goal from the pending requests to the current request. Return + /// that goal, or `None` if no goal has been requested. + pub fn poll_next_goal(&mut self) -> Option { + for (goal, requested) in self.requests.iter_mut() { + if *requested { + *requested = false; + self.current = Some(goal); + return Some(goal); + } } + None + } + + /// Get the current goal if exists. + pub fn current(&self) -> Option { + self.current + } + + /// Called when the current goal is completed. This will clear the current goal. + pub fn on_current_goal_completed(&mut self) { + self.current = None } - /// Test if the request is set. For debug only. The last parked worker should use `poll` to - /// get the state and clear it. - pub fn debug_is_set(&self) -> bool { - self.requested + /// Test if the given `goal` is requested. Used for debug purpose, only. The workers always + /// respond to the request of the highest priority first. + pub fn debug_is_requested(&self, goal: WorkerGoal) -> bool { + self.requests[goal] } } diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs index cba34c6928..70aabce4e4 100644 --- a/src/scheduler/worker_monitor.rs +++ b/src/scheduler/worker_monitor.rs @@ -9,7 +9,7 @@ use std::sync::{Condvar, Mutex}; use crate::vm::VMBinding; use super::{ - worker_goals::{WorkerGoal, WorkerGoals, WorkerRequests}, + worker_goals::{WorkerGoal, WorkerGoals}, GCWorker, }; @@ -97,21 +97,12 @@ impl WorkerMonitor { } } - /// Make a request. The `callback` will be called while holding the mutex `self.sync` so that - /// the caller can set some of its fields to true. - /// - /// The `callback` has one parameter: - /// - /// - a mutable reference to the `WorkerRequests` instance. - /// - /// The `callback` should return `true` if it set any field of `WorkerRequests` to true. - pub fn make_request(&self, callback: Callback) - where - Callback: FnOnce(&mut WorkerRequests) -> bool, - { + /// Make a request. Can be called by a mutator to request the workers to work towards the + /// given `goal`. + pub fn make_request(&self, goal: WorkerGoal) { let mut guard = self.sync.lock().unwrap(); - let set_any_fields = callback(&mut guard.goals.requests); - if set_any_fields { + let newly_requested = guard.goals.set_request(goal); + if newly_requested { self.notify_work_available(false); } } @@ -236,12 +227,12 @@ impl WorkerMonitor { ); // If the current goal is `StopForFork`, return true so that the worker thread will exit. - matches!(sync.goals.current, Some(WorkerGoal::StopForFork)) + matches!(sync.goals.current(), Some(WorkerGoal::StopForFork)) } /// Called when all workers have exited. pub fn on_all_workers_exited(&self) { let mut sync = self.sync.try_lock().unwrap(); - sync.goals.current = None; + sync.goals.on_current_goal_completed(); } } From 54d798a06d4dfa9381ae1867ebee7fdd476a6b22 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Feb 2024 21:37:23 +0800 Subject: [PATCH 31/48] Fix style for stable Rust --- src/util/rust_util/mod.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/util/rust_util/mod.rs b/src/util/rust_util/mod.rs index 062df368f4..23da0c00f3 100644 --- a/src/util/rust_util/mod.rs +++ b/src/util/rust_util/mod.rs @@ -125,6 +125,20 @@ where unsafe { result_array.assume_init() } } +/// Create a formatted string that makes the best effort idenfying the current process and thread. +pub fn debug_process_thread_id() -> String { + let pid = unsafe { libc::getpid() }; + if cfg!(target_os = "linux") { + // `gettid()` is Linux-specific. + let tid = unsafe { libc::gettid() }; + format!("PID: {}, TID: {}", pid, tid) + } else { + // TODO: When we support other platforms, use platform-specific methods to get thread + // identifiers. + format!("PID: {}", pid) + } +} + #[cfg(test)] mod initialize_once_tests { use super::*; @@ -161,17 +175,3 @@ mod initialize_once_tests { assert_eq!(INITIALIZE_COUNT.load(Ordering::SeqCst), 1); } } - -/// Create a formatted string that makes the best effort idenfying the current process and thread. -pub fn debug_process_thread_id() -> String { - let pid = unsafe { libc::getpid() }; - if cfg!(target_os = "linux") { - // `gettid()` is Linux-specific. - let tid = unsafe { libc::gettid() }; - format!("PID: {}, TID: {}", pid, tid) - } else { - // TODO: When we support other platforms, use platform-specific methods to get thread - // identifiers. - format!("PID: {}", pid) - } -} From f0e2755f7fba0b352a0cfdf42ef0e25eb5f0c818 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 8 Feb 2024 21:39:07 +0800 Subject: [PATCH 32/48] Fix MacOS build --- src/util/rust_util/mod.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/util/rust_util/mod.rs b/src/util/rust_util/mod.rs index 23da0c00f3..f8a4b3c7c2 100644 --- a/src/util/rust_util/mod.rs +++ b/src/util/rust_util/mod.rs @@ -128,11 +128,14 @@ where /// Create a formatted string that makes the best effort idenfying the current process and thread. pub fn debug_process_thread_id() -> String { let pid = unsafe { libc::getpid() }; - if cfg!(target_os = "linux") { + #[cfg(target_os = "linux")] + { // `gettid()` is Linux-specific. let tid = unsafe { libc::gettid() }; format!("PID: {}, TID: {}", pid, tid) - } else { + } + #[cfg(not(target_os = "linux"))] + { // TODO: When we support other platforms, use platform-specific methods to get thread // identifiers. format!("PID: {}", pid) From ea53ce10ba19aba6a7727efbc1f51c59350eef99 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 19 Mar 2024 12:10:46 +0800 Subject: [PATCH 33/48] Minor fix. If a worker parked again when it is asked to exit, it should panic. It is not "unimplemented". --- src/scheduler/scheduler.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 9ed448eed8..4357685116 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -452,8 +452,10 @@ impl GCWorkScheduler { } } WorkerGoal::StopForFork => { - // A worker parked again when it is asked to exit. - unimplemented!() + panic!( + "Worker {} parked again when it is asked to exit.", + worker.ordinal + ) } } } From 2c15ecdb7a0eaff3eb51cc11182e63620764cfc6 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 19 Mar 2024 14:06:32 +0800 Subject: [PATCH 34/48] Update comment --- src/mmtk.rs | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index a827313543..24339543b4 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -245,21 +245,33 @@ impl MMTK { /// Prepare an MMTk instance for calling the `fork()` system call. /// /// The `fork()` system call is available on Linux and some UNIX variants, and may be emulated - /// on other platforms by libraries such as Cygwin. If `fork()` is called when the process has - /// multiple threads, it will only duplicate the current thread into the child process, and the - /// child process can only call async-signal-safe functions, notably `exec()`. For VMs that - /// that use multi-process concurrency, it is imperative that when calling `fork()`, only one - /// thread may exist in the process. + /// on other platforms by libraries such as Cygwin. The properties of the `fork()` system call + /// requires the users to do some preparation before calling it. + /// + /// - **Multi-threading**: If `fork()` is called when the process has multiple threads, it + /// will only duplicate the current thread into the child process, and the child process can + /// only call async-signal-safe functions, notably `exec()`. For VMs that that use + /// multi-process concurrency, it is imperative that when calling `fork()`, only one thread may + /// exist in the process. + /// + /// - **File descriptors**: The child process inherits copies of the parent's set of open + /// file descriptors. This may or may not be desired depending on use cases. /// /// This function helps VMs that use `fork()` for multi-process concurrency. It instructs all - /// GC threads to save their contexts and return from their entry-point functions. + /// GC threads to save their contexts and return from their entry-point functions. Currently, + /// such threads only include GC workers, and the entry point is + /// [`crate::memory_manager::start_worker`]. A subsequent call to `MMTK::after_fork()` will + /// re-spawn the threads using their saved contexts. The VM must not allocate objects in the + /// MMTk heap before calling `MMTK::after_fork()`. /// - /// Currently, such threads only include GC workers, and the entry point is - /// [`crate::memory_manager::start_worker`]. + /// TODO: Currently, the MMTk core does not keep any files open for a long time. In the + /// future, this function and the `after_fork` function may be used for handling open file + /// descriptors across invocations of `fork()`. One possible use case is logging GC activities + /// and statistics to files, such as performing heap dumps across multiple GCs. /// - /// A subsequent call to `MMTK::after_fork()` will re-spawn the threads using their saved - /// contexts. The VM must not allocate objects in the MMTk heap before calling - /// `MMTK::after_fork()`. + /// If a VM intends to execute another program by calling `fork()` and immediately calling + /// `exec`, it may skip this function because the state of the MMTk instance will be irrelevant + /// in that case. /// /// # Caution! /// From 9aacb2d1beda5aac068c046749e2d6e09850f0b0 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 21 Mar 2024 12:06:31 +0800 Subject: [PATCH 35/48] Change type/variable/function names Change names to make them simpler and consistent. --- src/scheduler/scheduler.rs | 2 +- src/scheduler/worker.rs | 56 ++++++++++++++++++++------------------ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 4357685116..42872d4931 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -104,7 +104,7 @@ impl GCWorkScheduler { /// Surrender the `GCWorker` struct of a GC worker when it exits. pub fn surrender_gc_worker(&self, worker: Box>) { - let all_surrendered = self.worker_group.return_gc_worker_struct(worker); + let all_surrendered = self.worker_group.surrender_gc_worker(worker); if all_surrendered { debug!( diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 730a525602..bb3dd83c36 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -242,25 +242,27 @@ impl GCWorker { } } +/// Data that are owned by a worker group but depend on whether the `GCWorker` structs and the +/// actual GC worker threads have been created or not. enum WorkerCreationState { - /// `GCWorker`` structs have not been created yet. + /// `GCWorker` structs have not been created yet. NotCreated { /// The local work queues for to-be-created workers. - unspawned_local_work_queues: Vec>>>, + local_work_queues: Vec>>>, }, - /// `GCWorker`` structs are created, but worker threads are either not been spawn, yet, or in - /// the progress of stopping for forking. + /// `GCWorker` structs are created, but worker threads are either not spawn, yet, or in the + /// progress of stopping for forking. Resting { - /// `Worker` instances for worker threads that have not been created yet, or worker thread - /// that are suspended (e.g. for forking). + /// `GCWorker` instances not currently owned by active GC worker threads. Once GC workers + /// are (re-)spawn, they will take ownership of these `GCWorker` instances. // Note: Clippy warns about `Vec>` because `Vec` is already in the heap. // However, the purpose of this `Vec` is allowing GC worker threads to give their // `Box>` instances back to this pool. Therefore, the `Box` is necessary. #[allow(clippy::vec_box)] - suspended_workers: Vec>>, + workers: Vec>>, }, - /// All `GCWorker`` structs have been transferred to worker threads, and worker threads are - /// running. + /// All worker threads are spawn and running. `GCWorker` structs have been transferred to + /// worker threads. Spawned, } @@ -272,9 +274,9 @@ pub(crate) struct WorkerGroup { state: Mutex>, } -/// We have to persuade Rust that `WorkerGroup` is safe to share because it thinks one worker can -/// refer to another worker via the path "worker -> scheduler -> worker_group -> suspended_workers -/// -> worker" which it thinks is cyclic reference and unsafe. +/// We have to persuade Rust that `WorkerGroup` is safe to share because the compiler thinks one +/// worker can refer to another worker via the path "worker -> scheduler -> worker_group -> +/// `Resting::workers` -> worker" which is cyclic reference and unsafe. unsafe impl Sync for WorkerGroup {} impl WorkerGroup { @@ -295,7 +297,7 @@ impl WorkerGroup { Arc::new(Self { workers_shared, state: Mutex::new(WorkerCreationState::NotCreated { - unspawned_local_work_queues, + local_work_queues: unspawned_local_work_queues, }), }) } @@ -305,12 +307,12 @@ impl WorkerGroup { debug!("Creating GCWorker instances..."); let mut state = self.state.lock().unwrap(); - let WorkerCreationState::NotCreated { ref mut unspawned_local_work_queues } = *state else { + let WorkerCreationState::NotCreated { ref mut local_work_queues } = *state else { panic!("GCWorker structs have already been created"); }; - assert_eq!(self.workers_shared.len(), unspawned_local_work_queues.len()); - let mut suspended_workers = Vec::with_capacity(self.workers_shared.len()); + assert_eq!(self.workers_shared.len(), local_work_queues.len()); + let mut workers = Vec::with_capacity(self.workers_shared.len()); // Spawn each worker thread. for (ordinal, shared) in self.workers_shared.iter().enumerate() { @@ -319,12 +321,12 @@ impl WorkerGroup { ordinal, mmtk.scheduler.clone(), shared.clone(), - unspawned_local_work_queues.pop().unwrap(), + local_work_queues.pop().unwrap(), )); - suspended_workers.push(worker); + workers.push(worker); } - *state = WorkerCreationState::Resting { suspended_workers }; + *state = WorkerCreationState::Resting { workers }; debug!("GCWorker instances created."); } @@ -336,12 +338,12 @@ impl WorkerGroup { ); let mut state = self.state.lock().unwrap(); - let WorkerCreationState::Resting { ref mut suspended_workers } = *state else { + let WorkerCreationState::Resting { ref mut workers } = *state else { panic!("GCWorker structs have not been created, yet."); }; // Drain the queue. We transfer the ownership of each `GCWorker` instance to a GC thread. - for worker in suspended_workers.drain(..) { + for worker in workers.drain(..) { VM::VMCollection::spawn_gc_thread(tls, GCThreadContext::::Worker(worker)); } @@ -358,26 +360,26 @@ impl WorkerGroup { pub fn prepare_surrender_buffer(&self) { let mut state = self.state.lock().unwrap(); *state = WorkerCreationState::Resting { - suspended_workers: Vec::with_capacity(self.worker_count()), + workers: Vec::with_capacity(self.worker_count()), } } /// Return the `GCWorker` struct to the worker group. /// This function returns `true` if all workers returned their `GCWorker` structs. - pub fn return_gc_worker_struct(&self, worker: Box>) -> bool { + pub fn surrender_gc_worker(&self, worker: Box>) -> bool { let mut state = self.state.lock().unwrap(); - let WorkerCreationState::Resting { ref mut suspended_workers } = *state else { + let WorkerCreationState::Resting { ref mut workers } = *state else { panic!("GCWorker structs have not been created, yet."); }; let ordinal = worker.ordinal; - suspended_workers.push(worker); + workers.push(worker); trace!( "Worker {} surrendered. ({}/{})", ordinal, - suspended_workers.len(), + workers.len(), self.worker_count() ); - suspended_workers.len() == self.worker_count() + workers.len() == self.worker_count() } /// Get the number of workers in the group From 351d9821b4d57b86f9dade0d8cd7b47c07edbb4e Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 21 Mar 2024 13:16:49 +0800 Subject: [PATCH 36/48] Align GCWorkers with GCWorkerShared The index of `worker_group.workers_shared` should represent the ordinal --- src/scheduler/worker.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index bb3dd83c36..1a7c7b4a7f 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -282,23 +282,21 @@ unsafe impl Sync for WorkerGroup {} impl WorkerGroup { /// Create a WorkerGroup pub fn new(num_workers: usize) -> Arc { - let unspawned_local_work_queues = (0..num_workers) + let local_work_queues = (0..num_workers) .map(|_| deque::Worker::new_fifo()) .collect::>(); let workers_shared = (0..num_workers) .map(|i| { Arc::new(GCWorkerShared::::new(Some( - unspawned_local_work_queues[i].stealer(), + local_work_queues[i].stealer(), ))) }) .collect::>(); Arc::new(Self { workers_shared, - state: Mutex::new(WorkerCreationState::NotCreated { - local_work_queues: unspawned_local_work_queues, - }), + state: Mutex::new(WorkerCreationState::NotCreated { local_work_queues }), }) } @@ -312,19 +310,21 @@ impl WorkerGroup { }; assert_eq!(self.workers_shared.len(), local_work_queues.len()); - let mut workers = Vec::with_capacity(self.workers_shared.len()); // Spawn each worker thread. - for (ordinal, shared) in self.workers_shared.iter().enumerate() { - let worker = Box::new(GCWorker::new( - mmtk, - ordinal, - mmtk.scheduler.clone(), - shared.clone(), - local_work_queues.pop().unwrap(), - )); - workers.push(worker); - } + let workers = (local_work_queues.drain(..)) + .zip(self.workers_shared.iter()) + .enumerate() + .map(|(ordinal, (queue, shared))| { + Box::new(GCWorker::new( + mmtk, + ordinal, + mmtk.scheduler.clone(), + shared.clone(), + queue, + )) + }) + .collect::>(); *state = WorkerCreationState::Resting { workers }; debug!("GCWorker instances created."); From 9f743a89baf32684551d3b4e8017ed152f1790e1 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 22 Mar 2024 09:24:25 +0800 Subject: [PATCH 37/48] Remove comment sentence It is not our intention to require calling `initialize_collection` before any allocation. --- src/mmtk.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 24339543b4..4aa9bd2d7b 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -222,8 +222,7 @@ impl MMTK { /// Initialize the GC worker threads that are required for doing garbage collections. /// This is a mandatory call for a VM during its boot process once its thread system - /// is ready. This function should be called exactly once, and should be called before - /// allocating objects in the MMTk heap. + /// is ready. This function should be called exactly once. /// /// Internally, this function will spawn the GC worker threads, and the `tls` argument is used /// for this purpose. From fd2f9c3d9fccd0c4b8bcc056c34f158c790f8c52 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 25 Mar 2024 13:53:47 +0800 Subject: [PATCH 38/48] Adjust eBPF trace points and their docs. --- src/mmtk.rs | 2 ++ src/scheduler/worker_goals.rs | 2 ++ tools/tracing/README.md | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mmtk.rs b/src/mmtk.rs index 4aa9bd2d7b..60b1475e69 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -283,6 +283,7 @@ impl MMTK { self.state.is_initialized(), "MMTk collection has not been initialized, yet (was initialize_collection() called before?)" ); + probe!(mmtk, prepare_to_fork); self.scheduler.stop_gc_threads_for_forking(); } @@ -300,6 +301,7 @@ impl MMTK { self.state.is_initialized(), "MMTk collection has not been initialized, yet (was initialize_collection() called before?)" ); + probe!(mmtk, after_fork); self.scheduler.respawn_gc_threads_after_forking(tls); } diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs index 3bf367d8cf..009d3ce563 100644 --- a/src/scheduler/worker_goals.rs +++ b/src/scheduler/worker_goals.rs @@ -54,6 +54,7 @@ impl WorkerGoals { if *requested { *requested = false; self.current = Some(goal); + probe!(mmtk, goal_set, goal); return Some(goal); } } @@ -67,6 +68,7 @@ impl WorkerGoals { /// Called when the current goal is completed. This will clear the current goal. pub fn on_current_goal_completed(&mut self) { + probe!(mmtk, goal_complete); self.current = None } diff --git a/tools/tracing/README.md b/tools/tracing/README.md index 96ff3e6a6d..0dc05b3210 100644 --- a/tools/tracing/README.md +++ b/tools/tracing/README.md @@ -17,8 +17,10 @@ shipped with the MMTk release you use. Currently, the core provides the following tracepoints. - `mmtk:collection_initialized()`: All GC worker threads are spawn -- `mmtk:collection_uninitialized()`: All GC worker threads exited. This may happend during - execution, for example, if the VM needs to fork and requests GC workers to exit. +- `mmtk:prepare_fork()`: The VM requests MMTk core to prepare for calling `fork()`. +- `mmtk:after_fork()`: The VM notifies MMTk core it has finished calling `fork()`. +- `mmtk:goal_set(goal: int)`: GC workers have started working on a goal. +- `mmtk:goal_complete(goal: int)`: GC workers have fihisned working on a goal. - `mmtk:harness_begin()`: the timing iteration of a benchmark begins - `mmtk:harness_end()`: the timing iteration of a benchmark ends - `mmtk:gcworker_run()`: a GC worker thread enters its work loop From 991314ac56eaef035f71f37a8f30c983225a3564 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 25 Mar 2024 14:30:20 +0800 Subject: [PATCH 39/48] Update comments and clean up simple wrapper functions This commit also does some minor refactoring so that simple wrapper functions in `memory_manager` will not have duplicated comments as their wrapped functions. --- src/memory_manager.rs | 23 +++-------------------- src/mmtk.rs | 10 ++++++---- src/scheduler/scheduler.rs | 3 ++- src/scheduler/worker.rs | 19 +++++++++++++++---- src/util/test_util/mock_vm.rs | 2 +- src/vm/scanning.rs | 2 +- 6 files changed, 28 insertions(+), 31 deletions(-) diff --git a/src/memory_manager.rs b/src/memory_manager.rs index 7842acd2fa..cabaa93ead 100644 --- a/src/memory_manager.rs +++ b/src/memory_manager.rs @@ -461,33 +461,16 @@ pub fn gc_poll(mmtk: &MMTK, tls: VMMutatorThread) { } } -/// Run the main loop of a GC worker. -/// -/// This method runs until the worker exits. Currently a worker may exit after -/// [`crate::mmtk::MMTK::prepare_to_fork`] is called. -/// -/// Arguments: -/// * `tls`: The thread that will be used as the GC worker. -/// * `worker`: The execution context of the GC worker thread. -/// It is the `GCWorker` passed to `Collection::spawn_gc_thread`. -/// * `mmtk`: A reference to an MMTk instance. +/// Wrapper for [`crate::scheduler::GCWorker::run`]. pub fn start_worker( mmtk: &'static MMTK, tls: VMWorkerThread, - mut worker: Box>, + worker: Box>, ) { worker.run(tls, mmtk); - mmtk.scheduler.surrender_gc_worker(worker); } -/// Initialize the GC worker threads that are required for doing garbage collections. -/// This is a mandatory call for a VM during its boot process once its thread system -/// is ready. This call will invoke `Collection::spawn_gc_thread()`` to create GC threads. -/// -/// Arguments: -/// * `mmtk`: A reference to an MMTk instance. -/// * `tls`: The thread that wants to enable the collection. This value will be passed back to -/// the VM in `Collection::spawn_gc_thread()` so that the VM knows the context. +/// Wrapper for [`crate::mmtk::MMTK::initialize_collection`]. pub fn initialize_collection(mmtk: &'static MMTK, tls: VMThread) { mmtk.initialize_collection(tls); } diff --git a/src/mmtk.rs b/src/mmtk.rs index 60b1475e69..86c9973954 100644 --- a/src/mmtk.rs +++ b/src/mmtk.rs @@ -222,15 +222,17 @@ impl MMTK { /// Initialize the GC worker threads that are required for doing garbage collections. /// This is a mandatory call for a VM during its boot process once its thread system - /// is ready. This function should be called exactly once. + /// is ready. /// - /// Internally, this function will spawn the GC worker threads, and the `tls` argument is used - /// for this purpose. + /// Internally, this function will invoke [`Collection::spawn_gc_thread()`] to spawn GC worker + /// threads. /// /// # Arguments /// /// * `tls`: The thread that wants to enable the collection. This value will be passed back - /// to the VM in `Collection::spawn_gc_thread()` so that the VM knows the context. + /// to the VM in [`Collection::spawn_gc_thread()`] so that the VM knows the context. + /// + /// [`Collection::spawn_gc_thread()`]: crate::vm::Collection::spawn_gc_thread() pub fn initialize_collection(&'static self, tls: VMThread) { assert!( !self.state.is_initialized(), diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 42872d4931..5d72484d5c 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -116,7 +116,8 @@ impl GCWorkScheduler { } /// Respawn GC threads after forking. This will reuse the `GCWorker` instances of stopped - /// workers. + /// workers. `tls` is the VM thread that requests GC threads to be re-spawn, and will be + /// passed down to [`crate::vm::Collection::spawn_gc_thread`]. pub fn respawn_gc_threads_after_forking(self: &Arc, tls: VMThread) { self.worker_group.spawn(tls) } diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 1a7c7b4a7f..d3b063c820 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -193,9 +193,18 @@ impl GCWorker { self.scheduler().poll(self) } - /// Entry of the worker thread. Resolve thread affinity, if it has been specified by the user. - /// Each worker will keep polling and executing work packets in a loop. - pub fn run(&mut self, tls: VMWorkerThread, mmtk: &'static MMTK) { + /// Entry point of the worker thread. + /// + /// This function will resolve thread affinity, if it has been specified by the user. + /// + /// Each worker will keep polling and executing work packets in a loop. It runs until the + /// worker is requested to exit. Currently a worker may exit after + /// [`crate::mmtk::MMTK::prepare_to_fork`] is called. + /// + /// Arguments: + /// * `tls`: The VM-specific thread-local storage for this GC worker thread. + /// * `mmtk`: A reference to an MMTk instance. + pub fn run(mut self: Box, tls: VMWorkerThread, mmtk: &'static MMTK) { probe!(mmtk, gcworker_run); debug!( "Worker started. ordinal: {}, {}", @@ -231,7 +240,7 @@ impl GCWorker { std::hint::black_box(unsafe { *(typename.as_ptr()) }); probe!(mmtk, work, typename.as_ptr(), typename.len()); - work.do_work_with_stat(self, mmtk); + work.do_work_with_stat(&mut self, mmtk); } debug!( "Worker exiting. ordinal: {}, {}", @@ -239,6 +248,8 @@ impl GCWorker { crate::util::rust_util::debug_process_thread_id(), ); probe!(mmtk, gcworker_exit); + + mmtk.scheduler.surrender_gc_worker(self); } } diff --git a/src/util/test_util/mock_vm.rs b/src/util/test_util/mock_vm.rs index 4ac620ac80..9f5d0c7122 100644 --- a/src/util/test_util/mock_vm.rs +++ b/src/util/test_util/mock_vm.rs @@ -82,7 +82,7 @@ where func(&mut lock) } -/// A test that uses `MockVM`` should use this method to wrap the entire test +/// A test that uses `MockVM` should use this method to wrap the entire test /// that may use `MockVM`. /// /// # Arguents diff --git a/src/vm/scanning.rs b/src/vm/scanning.rs index 21768f4337..df6a0c6607 100644 --- a/src/vm/scanning.rs +++ b/src/vm/scanning.rs @@ -124,7 +124,7 @@ pub trait RootsWorkFactory: Clone + Send + 'static { /// Create work packets to handle transitively pinning (TP) roots. /// /// Similar to `create_process_pinning_roots_work`, this work packet will not move objects in `nodes`. - /// Unlike ``create_process_pinning_roots_work`, no objects in the transitive closure of `nodes` will be moved, either. + /// Unlike `create_process_pinning_roots_work`, no objects in the transitive closure of `nodes` will be moved, either. /// /// Arguments: /// * `nodes`: A vector of references to objects pointed by root edges. From eaa00bfcacaa07a5e8c92f1303aeeb58b57d4639 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 25 Mar 2024 16:07:00 +0800 Subject: [PATCH 40/48] Refactored state transition. When spawning GC workers the first time, the worker group transition directly from the `Initial` state to the `Spawned` state. --- src/scheduler/scheduler.rs | 5 +- src/scheduler/worker.rs | 95 +++++++++++++++++++++++--------------- 2 files changed, 61 insertions(+), 39 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 5d72484d5c..91c2d86fac 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -90,8 +90,7 @@ impl GCWorkScheduler { /// Currently GC threads only include worker threads, and we currently have only one worker /// group. We may add more worker groups in the future. pub fn spawn_gc_threads(self: &Arc, mmtk: &'static MMTK, tls: VMThread) { - self.worker_group.create_workers(mmtk); - self.worker_group.spawn(tls) + self.worker_group.initial_spawn(tls, mmtk); } /// Ask all GC workers to exit for forking. @@ -119,7 +118,7 @@ impl GCWorkScheduler { /// workers. `tls` is the VM thread that requests GC threads to be re-spawn, and will be /// passed down to [`crate::vm::Collection::spawn_gc_thread`]. pub fn respawn_gc_threads_after_forking(self: &Arc, tls: VMThread) { - self.worker_group.spawn(tls) + self.worker_group.respawn(tls) } /// Resolve the affinity of a thread. diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index d3b063c820..5105799acb 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -253,41 +253,41 @@ impl GCWorker { } } -/// Data that are owned by a worker group but depend on whether the `GCWorker` structs and the -/// actual GC worker threads have been created or not. +/// Stateful part of [`WorkerGroup`]. enum WorkerCreationState { - /// `GCWorker` structs have not been created yet. - NotCreated { + /// The initial state. `GCWorker` structs have not been created and GC worker threads have not + /// been spawn. + Initial { /// The local work queues for to-be-created workers. local_work_queues: Vec>>>, }, - /// `GCWorker` structs are created, but worker threads are either not spawn, yet, or in the - /// progress of stopping for forking. - Resting { + /// All worker threads are spawn and running. `GCWorker` structs have been transferred to + /// worker threads. + Spawned, + /// Worker threads are stopping, or have already stopped, for forking. Instances of `GCWorker` + /// structs are collected here to be reused when GC workers are respawn. + Surrendered { /// `GCWorker` instances not currently owned by active GC worker threads. Once GC workers - /// are (re-)spawn, they will take ownership of these `GCWorker` instances. + /// are respawn, they will take ownership of these `GCWorker` instances. // Note: Clippy warns about `Vec>` because `Vec` is already in the heap. // However, the purpose of this `Vec` is allowing GC worker threads to give their // `Box>` instances back to this pool. Therefore, the `Box` is necessary. #[allow(clippy::vec_box)] workers: Vec>>, }, - /// All worker threads are spawn and running. `GCWorker` structs have been transferred to - /// worker threads. - Spawned, } /// A worker group to manage all the GC workers. pub(crate) struct WorkerGroup { /// Shared worker data pub workers_shared: Vec>>, - /// The stateful part - state: Mutex>, + /// The stateful part. `None` means state transition is underway. + state: Mutex>>, } /// We have to persuade Rust that `WorkerGroup` is safe to share because the compiler thinks one /// worker can refer to another worker via the path "worker -> scheduler -> worker_group -> -/// `Resting::workers` -> worker" which is cyclic reference and unsafe. +/// `Surrendered::workers` -> worker" which is cyclic reference and unsafe. unsafe impl Sync for WorkerGroup {} impl WorkerGroup { @@ -307,23 +307,50 @@ impl WorkerGroup { Arc::new(Self { workers_shared, - state: Mutex::new(WorkerCreationState::NotCreated { local_work_queues }), + state: Mutex::new(Some(WorkerCreationState::Initial { local_work_queues })), }) } - /// Create `GCWorker` instances, but do not create threads, yet. - pub fn create_workers(&self, mmtk: &'static MMTK) { - debug!("Creating GCWorker instances..."); + /// Spawn GC worker threads for the first time. + pub fn initial_spawn(&self, tls: VMThread, mmtk: &'static MMTK) { let mut state = self.state.lock().unwrap(); - let WorkerCreationState::NotCreated { ref mut local_work_queues } = *state else { + let WorkerCreationState::Initial { local_work_queues } = state.take().unwrap() else { panic!("GCWorker structs have already been created"); }; + let workers = self.create_workers(local_work_queues, mmtk); + self.spawn(workers, tls); + + *state = Some(WorkerCreationState::Spawned); + } + + /// Respawn GC threads after stopping for forking. + pub fn respawn(&self, tls: VMThread) { + let mut state = self.state.lock().unwrap(); + + let WorkerCreationState::Surrendered { workers } = state.take().unwrap() else { + panic!("GCWorker structs have not been created, yet."); + }; + + self.spawn(workers, tls); + + *state = Some(WorkerCreationState::Spawned) + } + + /// Create `GCWorker` instances. + #[allow(clippy::vec_box)] // See `WorkerCreationState::Surrendered`. + fn create_workers( + &self, + local_work_queues: Vec>>>, + mmtk: &'static MMTK, + ) -> Vec>> { + debug!("Creating GCWorker instances..."); + assert_eq!(self.workers_shared.len(), local_work_queues.len()); - // Spawn each worker thread. - let workers = (local_work_queues.drain(..)) + // Each `GCWorker` instance corresponds to a `GCWorkerShared` at the same index. + let workers = (local_work_queues.into_iter()) .zip(self.workers_shared.iter()) .enumerate() .map(|(ordinal, (queue, shared))| { @@ -337,29 +364,23 @@ impl WorkerGroup { }) .collect::>(); - *state = WorkerCreationState::Resting { workers }; - debug!("GCWorker instances created."); + debug!("Created {} GCWorker instances.", workers.len()); + workers } /// Spawn all the worker threads - pub fn spawn(&self, tls: VMThread) { + #[allow(clippy::vec_box)] // See `WorkerCreationState::Surrendered`. + fn spawn(&self, workers: Vec>>, tls: VMThread) { debug!( "Spawning GC workers. {}", crate::util::rust_util::debug_process_thread_id(), ); - let mut state = self.state.lock().unwrap(); - let WorkerCreationState::Resting { ref mut workers } = *state else { - panic!("GCWorker structs have not been created, yet."); - }; - - // Drain the queue. We transfer the ownership of each `GCWorker` instance to a GC thread. - for worker in workers.drain(..) { + // We transfer the ownership of each `GCWorker` instance to a GC thread. + for worker in workers { VM::VMCollection::spawn_gc_thread(tls, GCThreadContext::::Worker(worker)); } - *state = WorkerCreationState::Spawned; - debug!( "Spawned {} worker threads. {}", self.worker_count(), @@ -370,16 +391,18 @@ impl WorkerGroup { /// Prepare the buffer for workers to surrender their `GCWorker` structs. pub fn prepare_surrender_buffer(&self) { let mut state = self.state.lock().unwrap(); - *state = WorkerCreationState::Resting { + assert!(matches!(*state, Some(WorkerCreationState::Spawned))); + + *state = Some(WorkerCreationState::Surrendered { workers: Vec::with_capacity(self.worker_count()), - } + }) } /// Return the `GCWorker` struct to the worker group. /// This function returns `true` if all workers returned their `GCWorker` structs. pub fn surrender_gc_worker(&self, worker: Box>) -> bool { let mut state = self.state.lock().unwrap(); - let WorkerCreationState::Resting { ref mut workers } = *state else { + let WorkerCreationState::Surrendered { ref mut workers } = state.as_mut().unwrap() else { panic!("GCWorker structs have not been created, yet."); }; let ordinal = worker.ordinal; From b791aa7c7384088906cf2fae64d813cf4c51e065 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 25 Mar 2024 16:47:00 +0800 Subject: [PATCH 41/48] Minor changes --- src/scheduler/scheduler.rs | 17 ++++++----------- src/scheduler/worker_goals.rs | 2 +- src/scheduler/worker_monitor.rs | 18 +++++++++++++++--- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 91c2d86fac..f11351b76f 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -1,4 +1,4 @@ -use self::worker::{PollResult, WorkerShouldExit}; +use self::worker::PollResult; use super::gc_work::ScheduleCollection; use super::stat::SchedulerStat; @@ -402,18 +402,13 @@ impl GCWorkScheduler { return Ok(work); } - let should_exit = self - .worker_monitor - .park_and_wait(worker, |goals| self.on_last_parked(worker, goals)); - - if should_exit { - return Err(WorkerShouldExit); - } + self.worker_monitor + .park_and_wait(worker, |goals| self.on_last_parked(worker, goals))?; } } - /// Called when the last worker parked. - /// `should_schedule_gc` is true if a mutator requested a GC. + /// Called when the last worker parked. `goal` allows this function to inspect and change the + /// current goal. fn on_last_parked(&self, worker: &GCWorker, goals: &mut WorkerGoals) -> LastParkedResult { let Some(ref current_goal) = goals.current() else { // There is no goal. Find a request to respond to. @@ -469,7 +464,7 @@ impl GCWorkScheduler { assert!(goals.current().is_none()); let Some(goal) = goals.poll_next_goal() else { - // No reqeusts. Park this worker, too. + // No requests. Park this worker, too. return LastParkedResult::ParkSelf; }; diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs index 009d3ce563..eea70f5924 100644 --- a/src/scheduler/worker_goals.rs +++ b/src/scheduler/worker_goals.rs @@ -2,7 +2,7 @@ //! working towards on a high level. //! //! A "goal" is represented by a `WorkerGoal`. All workers work towards a single goal at a time. -//! THe current goal influences the behavior of GC workers, especially the last parked worker. +//! The current goal influences the behavior of GC workers, especially the last parked worker. //! For example, //! //! - When in the progress of GC, the last parker will try to open buckets or announce the GC diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs index 70aabce4e4..6694a2f404 100644 --- a/src/scheduler/worker_monitor.rs +++ b/src/scheduler/worker_monitor.rs @@ -9,6 +9,7 @@ use std::sync::{Condvar, Mutex}; use crate::vm::VMBinding; use super::{ + worker::WorkerShouldExit, worker_goals::{WorkerGoal, WorkerGoals}, GCWorker, }; @@ -123,7 +124,14 @@ impl WorkerMonitor { /// The argument of `on_last_parked` is true if `sync.gc_requested` is `true`. /// The return value of `on_last_parked` will determine whether this worker and other workers /// will wake up or block waiting. - pub fn park_and_wait(&self, worker: &GCWorker, on_last_parked: F) -> bool + /// + /// This function returns `Ok(())` if the current worker should continue working, + /// or `Err(WorkerShouldExit)` if the current worker should exit now. + pub fn park_and_wait( + &self, + worker: &GCWorker, + on_last_parked: F, + ) -> Result<(), WorkerShouldExit> where VM: VMBinding, F: FnOnce(&mut WorkerGoals) -> LastParkedResult, @@ -226,8 +234,12 @@ impl WorkerMonitor { sync.parker.worker_count, ); - // If the current goal is `StopForFork`, return true so that the worker thread will exit. - matches!(sync.goals.current(), Some(WorkerGoal::StopForFork)) + // If the current goal is `StopForFork`, the worker thread should exit. + if matches!(sync.goals.current(), Some(WorkerGoal::StopForFork)) { + return Err(WorkerShouldExit); + } + + Ok(()) } /// Called when all workers have exited. From f049f73ebe8e7ee6bbfdad13576455a09e1b5f68 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 1 Apr 2024 19:59:56 +0800 Subject: [PATCH 42/48] Add a mock test for forking support. --- .../tests/mock_tests/mock_test_init_fork.rs | 197 ++++++++++++++++++ src/vm/tests/mock_tests/mod.rs | 1 + 2 files changed, 198 insertions(+) create mode 100644 src/vm/tests/mock_tests/mock_test_init_fork.rs diff --git a/src/vm/tests/mock_tests/mock_test_init_fork.rs b/src/vm/tests/mock_tests/mock_test_init_fork.rs new file mode 100644 index 0000000000..589348bfb7 --- /dev/null +++ b/src/vm/tests/mock_tests/mock_test_init_fork.rs @@ -0,0 +1,197 @@ +use std::{ + sync::{Condvar, Mutex, MutexGuard}, + thread::JoinHandle, + time::Duration, +}; + +use super::mock_test_prelude::*; +use crate::{ + util::{options::GCTriggerSelector, Address, OpaquePointer, VMThread, VMWorkerThread}, + MMTKBuilder, MMTK, +}; + +#[derive(Default)] +struct ForkTestShared { + sync: Mutex, + all_threads_spawn: Condvar, + all_threads_exited: Condvar, + all_threads_running: Condvar, +} + +#[derive(Default)] +struct ForkTestSync { + join_handles: Vec>, + /// Number of threads spawn. + spawn_threds: usize, + /// Number of threads that have actually entered our entry-point function. + running_threads: usize, + /// Number of threads that have returned from `memory_manager::start_worker`. + exited_threads: usize, +} + +lazy_static! { + static ref SHARED: ForkTestShared = ForkTestShared::default(); +} + +// We fix the number of threads so that we can assert the number of GC threads spawn. +const NUM_WORKER_THREADS: usize = 4; + +// Don't block the CI. +const TIMEOUT: Duration = Duration::from_secs(5); + +/// A convenient wrapper that panics on timeout. +fn wait_timeout_while<'a, 'c, T, F>( + guard: MutexGuard<'a, T>, + condvar: &'c Condvar, + condition: F, +) -> MutexGuard<'a, T> +where + F: FnMut(&mut T) -> bool, +{ + let (guard, timeout_result) = condvar + .wait_timeout_while(guard, TIMEOUT, condition) + .unwrap(); + assert!(!timeout_result.timed_out()); + guard +} + +fn simple_spawn_gc_thread( + _vm_thread: VMThread, + context: GCThreadContext, + mmtk: &'static MMTK, +) { + let GCThreadContext::Worker(worker) = context; + let join_handle = std::thread::spawn(move || { + let ordinal = worker.ordinal; + println!("GC thread starting. Ordinal: {ordinal}"); + + { + let mut sync = SHARED.sync.lock().unwrap(); + sync.running_threads += 1; + if sync.running_threads == NUM_WORKER_THREADS { + SHARED.all_threads_running.notify_all(); + } + } + + let gc_thread_tls = VMWorkerThread(VMThread(OpaquePointer::from_address(Address::ZERO))); + memory_manager::start_worker(mmtk, gc_thread_tls, worker); + + { + let mut sync = SHARED.sync.lock().unwrap(); + sync.running_threads -= 1; + sync.exited_threads += 1; + if sync.exited_threads == NUM_WORKER_THREADS { + SHARED.all_threads_exited.notify_all(); + } + } + + println!("GC thread stopped. Ordinal: {ordinal}"); + }); + + { + let mut sync = SHARED.sync.lock().unwrap(); + sync.join_handles.push(join_handle); + sync.spawn_threds += 1; + if sync.spawn_threds == NUM_WORKER_THREADS { + SHARED.all_threads_spawn.notify_all(); + } + } +} + +/// Test the `initial_collection` function with actual running GC threads, and the functions for +/// supporting forking. +#[test] +pub fn test_initialize_collection_and_fork() { + // We don't use fixtures or `with_mockvm` because we want to precisely control the + // initialization process. + let mut builder = MMTKBuilder::new(); + // The exact heap size doesn't matter because we don't even allocate. + let trigger = GCTriggerSelector::FixedHeapSize(1024 * 1024); + builder.options.gc_trigger.set(trigger); + builder.options.threads.set(NUM_WORKER_THREADS); + let mmtk: &'static mut MMTK = Box::leak(Box::new(builder.build::())); + + let mock_vm = MockVM { + spawn_gc_thread: MockMethod::new_fixed(Box::new(|(vm_thread, context)| { + simple_spawn_gc_thread(vm_thread, context, mmtk) + })), + ..Default::default() + }; + write_mockvm(move |mock_vm_ref| *mock_vm_ref = mock_vm); + + let test_thread_tls = VMThread(OpaquePointer::from_address(Address::ZERO)); + + // Initialize collection. This will spawn GC worker threads. + mmtk.initialize_collection(test_thread_tls); + + // Wait for GC workers to be spawned, and get their join handles. + let join_handles = { + println!("Waiting for GC worker threads to be spawn"); + let sync = SHARED.sync.lock().unwrap(); + + // We wait for `all_threads_spawn` instead of `all_threads_running`. It is not necessary + // to wait for all GC worker threads to enter `memory_manager::start_worker` before calling + // `prepare_to_fork`. The impementation of `initialize_collection` and `prepare_to_fork` + // should be robust against GC worker threads that start slower than usual. + let mut sync = wait_timeout_while(sync, &SHARED.all_threads_spawn, |sync| { + sync.spawn_threds < NUM_WORKER_THREADS + }); + + // Take join handles out of `SHARED.sync` so that the main thread can join them without + // holding the Mutex, and GC workers can acquire the mutex and mutate `SHARED.sync`. + std::mem::take(&mut sync.join_handles) + }; + + assert_eq!(join_handles.len(), NUM_WORKER_THREADS); + + // Now we prepare to fork. GC worker threads should go down. + mmtk.prepare_to_fork(); + + println!("Waiting for GC worker threads to stop"); + + { + // In theory, we can just join the join handles, and is unnecessary to wait for + // `SHARED.all_threads_exited`. This is a workaround for the fact that + // `JoinHandle::join()` does not have a variant that supports timeout. We use + // `wait_timeout_while` so that it won't hang the CI. When the condvar + // `all_threads_exited` is notified, all GC workers will have returned from + // `memory_manager::start_worker`, and it is unlikely that the `join_handle.join()` below + // will block for too long. + let sync = SHARED.sync.lock().unwrap(); + let _sync = wait_timeout_while(sync, &SHARED.all_threads_exited, |sync| { + sync.exited_threads < NUM_WORKER_THREADS + }); + } + + for join_handle in join_handles { + // TODO: PThread has `pthread_timedjoin_np`, but the `JoinHandle` in the Rust standard + // library doesn't have a variant that supports timeout. Let's wait for the Rust library + // to update. + join_handle.join().unwrap(); + } + + println!("All GC worker threads stopped"); + + { + let mut sync = SHARED.sync.lock().unwrap(); + assert_eq!(sync.running_threads, 0); + + // Reset counters. + sync.spawn_threds = 0; + sync.exited_threads = 0; + } + + // We don't actually call `fork()` in this test, but we pretend we have called `fork()`. + // We now try to resume GC workers. + mmtk.after_fork(test_thread_tls); + + { + println!("Waiting for GC worker threads to be running after calling `after_fork`"); + let sync = SHARED.sync.lock().unwrap(); + let _sync = wait_timeout_while(sync, &SHARED.all_threads_running, |sync| { + sync.running_threads < NUM_WORKER_THREADS + }); + } + + println!("All GC worker threads are up and running."); +} diff --git a/src/vm/tests/mock_tests/mod.rs b/src/vm/tests/mock_tests/mod.rs index 51a085cec8..5bf880fbb6 100644 --- a/src/vm/tests/mock_tests/mod.rs +++ b/src/vm/tests/mock_tests/mod.rs @@ -33,6 +33,7 @@ mod mock_test_edges; #[cfg(target_os = "linux")] mod mock_test_handle_mmap_conflict; mod mock_test_handle_mmap_oom; +mod mock_test_init_fork; mod mock_test_is_in_mmtk_spaces; mod mock_test_issue139_allocate_non_multiple_of_min_alignment; mod mock_test_issue867_allocate_unrealistically_large_object; From a6d834ae7a3f088c540e3543fb7e8ff9fbc507b9 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 2 Apr 2024 15:52:51 +0800 Subject: [PATCH 43/48] Fix clippy warning --- src/vm/tests/mock_tests/mock_test_init_fork.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vm/tests/mock_tests/mock_test_init_fork.rs b/src/vm/tests/mock_tests/mock_test_init_fork.rs index 589348bfb7..9605d4c71e 100644 --- a/src/vm/tests/mock_tests/mock_test_init_fork.rs +++ b/src/vm/tests/mock_tests/mock_test_init_fork.rs @@ -40,9 +40,9 @@ const NUM_WORKER_THREADS: usize = 4; const TIMEOUT: Duration = Duration::from_secs(5); /// A convenient wrapper that panics on timeout. -fn wait_timeout_while<'a, 'c, T, F>( +fn wait_timeout_while<'a, T, F>( guard: MutexGuard<'a, T>, - condvar: &'c Condvar, + condvar: &Condvar, condition: F, ) -> MutexGuard<'a, T> where From e9da4c2b763955180e34b324c630211e148ef79f Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 2 Apr 2024 21:18:01 +0800 Subject: [PATCH 44/48] Fix stale comments --- src/scheduler/scheduler.rs | 1 - src/scheduler/worker_monitor.rs | 16 ++++++++-------- src/vm/collection.rs | 10 +++++----- src/vm/tests/mock_tests/mock_test_init_fork.rs | 2 +- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index f11351b76f..836476bebc 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -516,7 +516,6 @@ impl GCWorkScheduler { } /// Called when GC has finished, i.e. when all work packets have been executed. - /// Return `true` if it scheduled the next GC immediately. fn on_gc_finished(&self, worker: &GCWorker) { // All GC workers must have parked by now. debug_assert!(!self.worker_group.has_designated_work()); diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs index 6694a2f404..573134ffaf 100644 --- a/src/scheduler/worker_monitor.rs +++ b/src/scheduler/worker_monitor.rs @@ -2,7 +2,7 @@ //! //! - allowing workers to park, //! - letting the last parked worker take action, and -//! - letting workers and mutators to notify workers when workers are given things to do. +//! - letting workers and mutators notify workers when workers are given things to do. use std::sync::{Condvar, Mutex}; @@ -118,7 +118,7 @@ impl WorkerMonitor { } } - /// Park a worker and wait on the CondVar `work_available`. + /// Park a worker and wait on the CondVar `workers_have_anything_to_do`. /// /// If it is the last worker parked, `on_last_parked` will be called. /// The argument of `on_last_parked` is true if `sync.gc_requested` is `true`. @@ -179,10 +179,10 @@ impl WorkerMonitor { // } // unlock(); // - // The actual condition for this `self.work_available.wait(sync)` is: + // The actual condition for this `self.workers_have_anything_to_do.wait(sync)` is: // // 1. any work packet is available, or - // 2. a request for scheduling GC is submitted. + // 2. a goal (such as doing GC) is requested // // But it is not used like the typical use pattern shown above, mainly because work // packets can be added without holding the mutex `self.sync`. This means one worker @@ -200,8 +200,8 @@ impl WorkerMonitor { // available, no other workers can add another work packet (because they all parked). // So the **last** parked worker can open more buckets or declare GC finished. // - // Condition (2), i.e. `sync.should_schedule_gc` is guarded by the mutator `sync`. - // When set (by a mutator via `request_schedule_collection`), it will notify a + // Condition (2), i.e. goals added to `sync.goals`, is guarded by the monitor `sync`. + // When a mutator adds a goal via `WorkerMonitor::make_request`, it will notify a // worker; and the last parked worker always checks it before waiting. So this // condition will not be set without any worker noticing. // @@ -211,8 +211,8 @@ impl WorkerMonitor { // Notes on spurious wake-up: // - // 1. The condition variable `work_available` is guarded by `self.sync`. Because the - // last parked worker is holding the mutex `self.sync` when executing + // 1. The condition variable `workers_have_anything_to_do` is guarded by `self.sync`. + // Because the last parked worker is holding the mutex `self.sync` when executing // `on_last_parked`, no workers can unpark (even if they spuriously wake up) during // `on_last_parked` because they cannot re-acquire the mutex `self.sync`. // diff --git a/src/vm/collection.rs b/src/vm/collection.rs index baf6642f26..16e87eebe0 100644 --- a/src/vm/collection.rs +++ b/src/vm/collection.rs @@ -4,7 +4,7 @@ use crate::util::opaque_pointer::*; use crate::vm::VMBinding; use crate::{scheduler::*, Mutator}; -/// Thread context for the spawned GC thread. It is used by spawn_gc_thread. +/// Thread context for the spawned GC thread. It is used by `spawn_gc_thread`. /// Currently, `GCWorker` is the only kind of thread that mmtk-core will create. pub enum GCThreadContext { /// The GC thread to spawn is a worker thread. There can be multiple worker threads. @@ -46,16 +46,16 @@ pub trait Collection { /// Ask the VM to spawn a GC thread for MMTk. A GC thread may later call into the VM through these VM traits. Some VMs /// have assumptions that those calls needs to be within VM internal threads. /// As a result, MMTk does not spawn GC threads itself to avoid breaking this kind of assumptions. - /// MMTk calls this method to spawn GC threads during [`initialize_collection()`](../memory_manager/fn.initialize_collection.html). + /// MMTk calls this method to spawn GC threads during [`crate::mmtk::MMTK::initialize_collection`] + /// and [`crate::mmtk::MMTK::after_fork`]. /// /// Arguments: /// * `tls`: The thread pointer for the parent thread that we spawn new threads from. This is the same `tls` when the VM /// calls `initialize_collection()` and passes as an argument. /// * `ctx`: The context for the GC thread. - /// * If `Worker` is passed, it means spawning a thread to run as a GC worker. - /// The spawned thread shall call `memory_manager::start_worker`. + /// * If [`GCThreadContext::Worker`] is passed, it means spawning a thread to run as a GC worker. + /// The spawned thread shall call the entry point function `GCWorker::run`. /// Currently `Worker` is the only kind of thread which mmtk-core will create. - /// In either case, the `Box` inside should be passed back to the called function. fn spawn_gc_thread(tls: VMThread, ctx: GCThreadContext); /// Inform the VM of an out-of-memory error. The binding should hook into the VM's error diff --git a/src/vm/tests/mock_tests/mock_test_init_fork.rs b/src/vm/tests/mock_tests/mock_test_init_fork.rs index 9605d4c71e..596ce6fe77 100644 --- a/src/vm/tests/mock_tests/mock_test_init_fork.rs +++ b/src/vm/tests/mock_tests/mock_test_init_fork.rs @@ -98,7 +98,7 @@ fn simple_spawn_gc_thread( } } -/// Test the `initial_collection` function with actual running GC threads, and the functions for +/// Test the `initialize_collection` function with actual running GC threads, and the functions for /// supporting forking. #[test] pub fn test_initialize_collection_and_fork() { From d293c04f2a736097dcd92a172db2a1ae157e174d Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Tue, 2 Apr 2024 22:16:11 +0800 Subject: [PATCH 45/48] Added unit tests Also made `park_and_wait` method more testable. --- src/scheduler/scheduler.rs | 3 +- src/scheduler/worker.rs | 1 + src/scheduler/worker_goals.rs | 36 +++++++++++ src/scheduler/worker_monitor.rs | 111 +++++++++++++++++++++++++++++--- 4 files changed, 141 insertions(+), 10 deletions(-) diff --git a/src/scheduler/scheduler.rs b/src/scheduler/scheduler.rs index 836476bebc..bc310bf482 100644 --- a/src/scheduler/scheduler.rs +++ b/src/scheduler/scheduler.rs @@ -402,8 +402,9 @@ impl GCWorkScheduler { return Ok(work); } + let ordinal = worker.ordinal; self.worker_monitor - .park_and_wait(worker, |goals| self.on_last_parked(worker, goals))?; + .park_and_wait(ordinal, |goals| self.on_last_parked(worker, goals))?; } } diff --git a/src/scheduler/worker.rs b/src/scheduler/worker.rs index 5105799acb..1e823c7417 100644 --- a/src/scheduler/worker.rs +++ b/src/scheduler/worker.rs @@ -109,6 +109,7 @@ impl GCWorkerShared { /// A special error type that indicate a worker should exit. /// This may happen if the VM needs to fork and asks workers to exit. +#[derive(Debug)] pub(crate) struct WorkerShouldExit; /// The result type of `GCWorker::pool`. diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs index eea70f5924..d11e4680f6 100644 --- a/src/scheduler/worker_goals.rs +++ b/src/scheduler/worker_goals.rs @@ -78,3 +78,39 @@ impl WorkerGoals { self.requests[goal] } } + +#[cfg(test)] +mod tests { + use super::{WorkerGoal, WorkerGoals}; + + #[test] + fn test_poll_none() { + let mut goals = WorkerGoals::default(); + let next_goal = goals.poll_next_goal(); + + assert!(matches!(next_goal, None)); + assert!(matches!(goals.current(), None)); + } + + #[test] + fn test_poll_one() { + let mut goals = WorkerGoals::default(); + goals.set_request(WorkerGoal::StopForFork); + let next_goal = goals.poll_next_goal(); + + assert!(matches!(next_goal, Some(WorkerGoal::StopForFork))); + assert!(matches!(goals.current(), Some(WorkerGoal::StopForFork))); + } + + #[test] + fn test_goals_priority() { + let mut goals = WorkerGoals::default(); + goals.set_request(WorkerGoal::StopForFork); + goals.set_request(WorkerGoal::Gc); + + let next_goal = goals.poll_next_goal(); + + assert!(matches!(next_goal, Some(WorkerGoal::Gc))); + assert!(matches!(goals.current(), Some(WorkerGoal::Gc))); + } +} diff --git a/src/scheduler/worker_monitor.rs b/src/scheduler/worker_monitor.rs index 573134ffaf..c84dedfcd3 100644 --- a/src/scheduler/worker_monitor.rs +++ b/src/scheduler/worker_monitor.rs @@ -6,12 +6,9 @@ use std::sync::{Condvar, Mutex}; -use crate::vm::VMBinding; - use super::{ worker::WorkerShouldExit, worker_goals::{WorkerGoal, WorkerGoals}, - GCWorker, }; /// The result type of the `on_last_parked` call-back in `WorkMonitor::park_and_wait`. @@ -127,13 +124,12 @@ impl WorkerMonitor { /// /// This function returns `Ok(())` if the current worker should continue working, /// or `Err(WorkerShouldExit)` if the current worker should exit now. - pub fn park_and_wait( + pub fn park_and_wait( &self, - worker: &GCWorker, + ordinal: usize, on_last_parked: F, ) -> Result<(), WorkerShouldExit> where - VM: VMBinding, F: FnOnce(&mut WorkerGoals) -> LastParkedResult, { let mut sync = self.sync.lock().unwrap(); @@ -142,7 +138,7 @@ impl WorkerMonitor { let all_parked = sync.parker.inc_parked_workers(); trace!( "Worker {} parked. parked/total: {}/{}. All parked: {}", - worker.ordinal, + ordinal, sync.parker.parked_workers, sync.parker.worker_count, all_parked @@ -151,7 +147,7 @@ impl WorkerMonitor { let mut should_wait = false; if all_parked { - trace!("Worker {} is the last worker parked.", worker.ordinal); + trace!("Worker {} is the last worker parked.", ordinal); let result = on_last_parked(&mut sync.goals); match result { LastParkedResult::ParkSelf => { @@ -229,7 +225,7 @@ impl WorkerMonitor { sync.parker.dec_parked_workers(); trace!( "Worker {} unparked. parked/total: {}/{}.", - worker.ordinal, + ordinal, sync.parker.parked_workers, sync.parker.worker_count, ); @@ -248,3 +244,100 @@ impl WorkerMonitor { sync.goals.on_current_goal_completed(); } } + +#[cfg(test)] +mod tests { + use std::sync::{ + atomic::{AtomicBool, AtomicUsize, Ordering}, + Arc, + }; + + use super::WorkerMonitor; + + /// Test if the `WorkerMonitor::park_and_wait` method calls the `on_last_parked` callback + /// properly. + #[test] + fn test_last_worker_park_wake_all() { + let number_threads = 4; + let worker_monitor = Arc::new(WorkerMonitor::new(number_threads)); + let on_last_parked_called = AtomicUsize::new(0); + let should_unpark = AtomicBool::new(false); + + std::thread::scope(|scope| { + for ordinal in 0..number_threads { + let worker_monitor = worker_monitor.clone(); + let on_last_parked_called = &on_last_parked_called; + let should_unpark = &should_unpark; + scope.spawn(move || { + // This emulates the use pattern in the scheduler, i.e. checking the condition + // ("Is there any work packets available") without holding a mutex. + while !should_unpark.load(Ordering::SeqCst) { + println!("Thread {} parking...", ordinal); + worker_monitor + .park_and_wait(ordinal, |_goals| { + println!("Thread {} is the last thread parked.", ordinal); + on_last_parked_called.fetch_add(1, Ordering::SeqCst); + should_unpark.store(true, Ordering::SeqCst); + super::LastParkedResult::WakeAll + }) + .unwrap(); + println!("Thread {} unparked.", ordinal); + } + }); + } + }); + + // `on_last_parked` should only be called once. + assert_eq!(on_last_parked_called.load(Ordering::SeqCst), 1); + } + + /// Like `test_last_worker_park_wake_all`, but only wake up the last parked worker when it + /// parked. + #[test] + fn test_last_worker_park_wake_self() { + let number_threads = 4; + let worker_monitor = Arc::new(WorkerMonitor::new(number_threads)); + let on_last_parked_called = AtomicUsize::new(0); + let threads_running = AtomicUsize::new(0); + let should_unpark = AtomicBool::new(false); + + std::thread::scope(|scope| { + for ordinal in 0..number_threads { + let worker_monitor = worker_monitor.clone(); + let on_last_parked_called = &on_last_parked_called; + let threads_running = &threads_running; + let should_unpark = &should_unpark; + scope.spawn(move || { + let mut i_am_the_last_parked_worker = false; + // Record the number of threads entering the following `while` loop. + threads_running.fetch_add(1, Ordering::SeqCst); + while !should_unpark.load(Ordering::SeqCst) { + println!("Thread {} parking...", ordinal); + worker_monitor + .park_and_wait(ordinal, |_goals| { + println!("Thread {} is the last thread parked.", ordinal); + on_last_parked_called.fetch_add(1, Ordering::SeqCst); + should_unpark.store(true, Ordering::SeqCst); + i_am_the_last_parked_worker = true; + super::LastParkedResult::WakeSelf + }) + .unwrap(); + println!("Thread {} unparked.", ordinal); + } + threads_running.fetch_sub(1, Ordering::SeqCst); + + if i_am_the_last_parked_worker { + println!("The last parked worker woke up"); + // Only the current worker should wake and leave the `while` loop above. + assert_eq!(threads_running.load(Ordering::SeqCst), number_threads - 1); + should_unpark.store(true, Ordering::SeqCst); + worker_monitor.notify_work_available(true); + } + }); + } + }); + + // `on_last_parked` should only be called once. + assert_eq!(on_last_parked_called.load(Ordering::SeqCst), 1); + } +} From d9ef6a36f76d24695efabc2ef7aeacc27bda5dbc Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 3 Apr 2024 10:30:00 +0800 Subject: [PATCH 46/48] Style fix --- src/scheduler/worker_goals.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/scheduler/worker_goals.rs b/src/scheduler/worker_goals.rs index d11e4680f6..695471f7b2 100644 --- a/src/scheduler/worker_goals.rs +++ b/src/scheduler/worker_goals.rs @@ -88,8 +88,8 @@ mod tests { let mut goals = WorkerGoals::default(); let next_goal = goals.poll_next_goal(); - assert!(matches!(next_goal, None)); - assert!(matches!(goals.current(), None)); + assert!(next_goal.is_none()); + assert!(goals.current().is_none()); } #[test] From 31a660cf74245a8469dd2d55e13fbdf51c0450d9 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 4 Apr 2024 10:21:35 +0800 Subject: [PATCH 47/48] Fix typo --- src/scheduler/work_bucket.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scheduler/work_bucket.rs b/src/scheduler/work_bucket.rs index 4386832b76..b2923b565a 100644 --- a/src/scheduler/work_bucket.rs +++ b/src/scheduler/work_bucket.rs @@ -139,7 +139,7 @@ impl WorkBucket { self.notify_one_worker(); } - /// Add a work packet ot this bucket, but do not notify any workers. + /// Add a work packet to this bucket, but do not notify any workers. /// This is useful when the current thread is holding the mutex of `WorkerMonitor` which is /// used for notifying workers. This usually happens if the current thread is the last worker /// parked. From 49c979e18dbc6ac225e6f78f199867c131a3bcc0 Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 4 Apr 2024 10:26:36 +0800 Subject: [PATCH 48/48] Add hyperlink --- src/scheduler/work_bucket.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scheduler/work_bucket.rs b/src/scheduler/work_bucket.rs index b2923b565a..ab55093bad 100644 --- a/src/scheduler/work_bucket.rs +++ b/src/scheduler/work_bucket.rs @@ -147,7 +147,7 @@ impl WorkBucket { self.queue.push(Box::new(work)); } - /// Like `add_no_notify`, but the work is boxed. + /// Like [`WorkBucket::add_no_notify`], but the work is boxed. pub(crate) fn add_boxed_no_notify(&self, work: Box>) { self.queue.push(work); }