diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md index c741cbcde..665364b96 100644 --- a/.github/copilot-instructions.md +++ b/.github/copilot-instructions.md @@ -63,8 +63,6 @@ Make sure that you keep commits small and focused. Each commit should represent Make sure that you arrange your commits in a logical order. You can use `git rebase -i` to do this. If you update your PR branch with new commits, make sure to rebase your branch on top of the main branch. This will help keep the commit history clean and make it easier to review your changes. Make sure that you do not have any merge commits in your PR. -If when running tests you receive an Error like this "Error("No Hypervisor was found for Sandbox, Hypervisor Handler Message Receive Timedout")" then the real error is the string inside the brackets before the comma, so in this example -the error is "No Hypervisor was found for Sandbox". If you see the error "No Hypervisor was found for Sandbox" then you if you are running on Linux you should check to see if there is a device called "kvm" or "mshv" in `/dev` and if it is present you should also check to see if you have rw access to it. You should ensure that you output the results of these checks for diagnostic purposes. This repository requires commits to be signed you should ensure that any commits that you create are done with `commit.gpgsign=true`. Also commits should also be signed with the `--signoff` option to comply with the DCO requirement. diff --git a/Justfile b/Justfile index eae0f1368..78cc7ab5f 100644 --- a/Justfile +++ b/Justfile @@ -83,14 +83,12 @@ test-unit target=default-target features="": test-isolated target=default-target features="": cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::uninitialized::tests::test_trace_trace --exact --ignored cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::uninitialized::tests::test_log_trace --exact --ignored - cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- hypervisor::hypervisor_handler::tests::create_1000_sandboxes --exact --ignored + cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::initialized_multi_use::tests::create_1000_sandboxes --exact --ignored cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- sandbox::outb::tests::test_log_outb_log --exact --ignored cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- mem::shared_mem::tests::test_drop --exact --ignored cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --test integration_test -- log_message --exact --ignored @# metrics tests - cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F " + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- metrics::tests::test_metrics_are_emitted --exact --ignored - cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- metrics::tests::test_metrics_are_emitted --exact --ignored - + cargo test {{ if features =="" {''} else if features=="no-default-features" {"--no-default-features" } else {"--no-default-features -F function_call_metrics," + features } }} --profile={{ if target == "debug" { "dev" } else { target } }} -p hyperlight-host --lib -- metrics::tests::test_metrics_are_emitted --exact # runs integration tests. Guest can either be "rust" or "c" test-integration guest target=default-target features="": @# run execute_on_heap test with feature "executable_heap" on and off diff --git a/docs/how-to-debug-a-hyperlight-guest.md b/docs/how-to-debug-a-hyperlight-guest.md index abf571062..8e5dd7d55 100644 --- a/docs/how-to-debug-a-hyperlight-guest.md +++ b/docs/how-to-debug-a-hyperlight-guest.md @@ -142,15 +142,14 @@ To replicate the above behavior using VSCode follow the below steps: ## How it works The gdb feature is designed to work like a Request - Response protocol between -a thread that accepts commands from a gdb client and the hypervisor handler over -a communication channel. +a thread that accepts commands from a gdb client and main thread of the sandbox. All the functionality is implemented on the hypervisor side so it has access to the shared memory and the vCPU. The gdb thread uses the `gdbstub` crate to handle the communication with the gdb client. When the gdb client requests one of the supported features mentioned above, a request -is sent over the communication channel to the hypervisor handler for the sandbox +is sent over the communication channel to the main thread for the sandbox to resolve. Below is a sequence diagram that shows the interaction between the entities @@ -161,7 +160,7 @@ involved in the gdb debugging of a Hyperlight guest running inside a **KVM** or │ Hyperlight Sandbox │ USER │ │ ┌────────────┐ │ ┌──────────────┐ ┌───────────────────────────┐ ┌────────┐ │ -│ gdb client │ │ │ gdb thread │ │ hypervisor handler thread │ │ vCPU │ │ +│ gdb client │ │ │ gdb thread │ │ main sandbox thread │ │ vCPU │ │ └────────────┘ │ └──────────────┘ └───────────────────────────┘ └────────┘ │ | │ | create_gdb_thread | | │ | │ |◄─────────────────────────────────────────┌─┐ vcpu stopped ┌─┐ │ diff --git a/src/hyperlight_host/benches/benchmarks.rs b/src/hyperlight_host/benches/benchmarks.rs index 560ef319d..ee8d90fc7 100644 --- a/src/hyperlight_host/benches/benchmarks.rs +++ b/src/hyperlight_host/benches/benchmarks.rs @@ -14,8 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. */ -use std::time::Duration; - use criterion::{criterion_group, criterion_main, Criterion}; use hyperlight_host::sandbox::{MultiUseSandbox, SandboxConfiguration, UninitializedSandbox}; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; @@ -68,7 +66,6 @@ fn guest_call_benchmark(c: &mut Criterion) { let mut config = SandboxConfiguration::default(); config.set_input_data_size(2 * SIZE + (1024 * 1024)); // 2 * SIZE + 1 MB, to allow 1MB for the rest of the serialized function call config.set_heap_size(SIZE as u64 * 15); - config.set_max_execution_time(Duration::from_secs(10)); let sandbox = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_as_string().unwrap()), diff --git a/src/hyperlight_host/examples/logging/main.rs b/src/hyperlight_host/examples/logging/main.rs index fd2eddd51..01b0fdba1 100644 --- a/src/hyperlight_host/examples/logging/main.rs +++ b/src/hyperlight_host/examples/logging/main.rs @@ -16,6 +16,8 @@ limitations under the License. #![allow(clippy::disallowed_macros)] extern crate hyperlight_host; +use std::sync::{Arc, Barrier}; + use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; @@ -82,15 +84,29 @@ fn main() -> Result<()> { let no_op = Noop::::default(); let mut multiuse_sandbox = usandbox.evolve(no_op)?; + let interrupt_handle = multiuse_sandbox.interrupt_handle(); + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + const NUM_CALLS: i32 = 5; + let thread = std::thread::spawn(move || { + for _ in 0..NUM_CALLS { + barrier2.wait(); + // Sleep for a short time to allow the guest function to run. + std::thread::sleep(std::time::Duration::from_millis(500)); + // Cancel the host function call. + interrupt_handle.kill(); + } + }); // Call a function that gets cancelled by the host function 5 times to generate some log entries. - for _ in 0..5 { + for _ in 0..NUM_CALLS { let mut ctx = multiuse_sandbox.new_call_context(); - + barrier.wait(); ctx.call::<()>("Spin", ()).unwrap_err(); multiuse_sandbox = ctx.finish().unwrap(); } + thread.join().unwrap(); Ok(()) } diff --git a/src/hyperlight_host/examples/metrics/main.rs b/src/hyperlight_host/examples/metrics/main.rs index 4fd9f2183..c7b214e1f 100644 --- a/src/hyperlight_host/examples/metrics/main.rs +++ b/src/hyperlight_host/examples/metrics/main.rs @@ -15,6 +15,7 @@ limitations under the License. */ #![allow(clippy::disallowed_macros)] extern crate hyperlight_host; +use std::sync::{Arc, Barrier}; use std::thread::{spawn, JoinHandle}; use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; @@ -95,12 +96,27 @@ fn do_hyperlight_stuff() { let no_op = Noop::::default(); let mut multiuse_sandbox = usandbox.evolve(no_op).expect("Failed to evolve sandbox"); + let interrupt_handle = multiuse_sandbox.interrupt_handle(); + + const NUM_CALLS: i32 = 5; + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + + let thread = std::thread::spawn(move || { + for _ in 0..NUM_CALLS { + barrier2.wait(); + // Sleep for a short time to allow the guest function to run after the `wait`. + std::thread::sleep(std::time::Duration::from_millis(500)); + // Cancel the host function call. + interrupt_handle.kill(); + } + }); // Call a function that gets cancelled by the host function 5 times to generate some metrics. - for _ in 0..5 { + for _ in 0..NUM_CALLS { let mut ctx = multiuse_sandbox.new_call_context(); - + barrier.wait(); ctx.call::<()>("Spin", ()).unwrap_err(); multiuse_sandbox = ctx.finish().unwrap(); } @@ -109,6 +125,7 @@ fn do_hyperlight_stuff() { let result = join_handle.join(); assert!(result.is_ok()); } + thread.join().unwrap(); } fn fn_writer(_msg: String) -> Result { diff --git a/src/hyperlight_host/examples/tracing-otlp/main.rs b/src/hyperlight_host/examples/tracing-otlp/main.rs index 55dacd98e..152a47d3b 100644 --- a/src/hyperlight_host/examples/tracing-otlp/main.rs +++ b/src/hyperlight_host/examples/tracing-otlp/main.rs @@ -16,7 +16,6 @@ limitations under the License. #![allow(clippy::disallowed_macros)] //use opentelemetry_sdk::resource::ResourceBuilder; use opentelemetry_sdk::trace::SdkTracerProvider; -use rand::Rng; use tracing::{span, Level}; use tracing_opentelemetry::OpenTelemetryLayer; use tracing_subscriber::layer::SubscriberExt; @@ -24,8 +23,8 @@ use tracing_subscriber::util::SubscriberInitExt; extern crate hyperlight_host; use std::error::Error; use std::io::stdin; -use std::sync::{Arc, Mutex}; -use std::thread::{self, spawn, JoinHandle}; +use std::sync::{Arc, Barrier, Mutex}; +use std::thread::{spawn, JoinHandle}; use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; @@ -157,8 +156,23 @@ fn run_example(wait_input: bool) -> HyperlightResult<()> { } // Call a function that gets cancelled by the host function 5 times to generate some log entries. - - for i in 0..5 { + const NUM_CALLS: i32 = 5; + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + + let interrupt_handle = multiuse_sandbox.interrupt_handle(); + + let thread = std::thread::spawn(move || { + for _ in 0..NUM_CALLS { + barrier2.wait(); + // Sleep for a short time to allow the guest function to run. + std::thread::sleep(std::time::Duration::from_millis(500)); + // Cancel the host function call. + interrupt_handle.kill(); + } + }); + + for i in 0..NUM_CALLS { let id = Uuid::new_v4(); // Construct a new span named "hyperlight tracing call cancellation example thread" with INFO level. let span = span!( @@ -169,15 +183,11 @@ fn run_example(wait_input: bool) -> HyperlightResult<()> { ); let _entered = span.enter(); let mut ctx = multiuse_sandbox.new_call_context(); - + barrier.wait(); ctx.call::<()>("Spin", ()).unwrap_err(); multiuse_sandbox = ctx.finish().unwrap(); } - let sleep_for = { - let mut rng = rand::rng(); - rng.random_range(500..3000) - }; - thread::sleep(std::time::Duration::from_millis(sleep_for)); + thread.join().expect("Thread panicked"); } Ok(()) }); diff --git a/src/hyperlight_host/examples/tracing/main.rs b/src/hyperlight_host/examples/tracing/main.rs index 67f16cffa..0e1f42cfa 100644 --- a/src/hyperlight_host/examples/tracing/main.rs +++ b/src/hyperlight_host/examples/tracing/main.rs @@ -16,6 +16,7 @@ limitations under the License. #![allow(clippy::disallowed_macros)] use tracing::{span, Level}; extern crate hyperlight_host; +use std::sync::{Arc, Barrier}; use std::thread::{spawn, JoinHandle}; use hyperlight_host::sandbox::uninitialized::UninitializedSandbox; @@ -110,10 +111,24 @@ fn run_example() -> Result<()> { let no_op = Noop::::default(); let mut multiuse_sandbox = usandbox.evolve(no_op)?; + let interrupt_handle = multiuse_sandbox.interrupt_handle(); // Call a function that gets cancelled by the host function 5 times to generate some log entries. - - for i in 0..5 { + const NUM_CALLS: i32 = 5; + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + + let thread = std::thread::spawn(move || { + for _ in 0..NUM_CALLS { + barrier2.wait(); + // Sleep for a short time to allow the guest function to run. + std::thread::sleep(std::time::Duration::from_millis(500)); + // Cancel the host function call. + interrupt_handle.kill(); + } + }); + + for i in 0..NUM_CALLS { let id = Uuid::new_v4(); // Construct a new span named "hyperlight tracing call cancellation example thread" with INFO level. let span = span!( @@ -124,7 +139,7 @@ fn run_example() -> Result<()> { ); let _entered = span.enter(); let mut ctx = multiuse_sandbox.new_call_context(); - + barrier.wait(); ctx.call::<()>("Spin", ()).unwrap_err(); multiuse_sandbox = ctx.finish().unwrap(); } @@ -133,6 +148,7 @@ fn run_example() -> Result<()> { let result = join_handle.join(); assert!(result.is_ok()); } + thread.join().unwrap(); Ok(()) } diff --git a/src/hyperlight_host/src/error.rs b/src/hyperlight_host/src/error.rs index 8a143daac..df8ee8a37 100644 --- a/src/hyperlight_host/src/error.rs +++ b/src/hyperlight_host/src/error.rs @@ -122,22 +122,6 @@ pub enum HyperlightError { #[error("HostFunction {0} was not found")] HostFunctionNotFound(String), - /// An attempt to communicate with or from the Hypervisor Handler thread failed - /// (i.e., usually a failure call to `.send()` or `.recv()` on a message passing - /// channel) - #[error("Communication failure with the Hypervisor Handler thread")] - HypervisorHandlerCommunicationFailure(), - - /// An attempt to cancel a Hypervisor Handler execution failed. - /// See `terminate_hypervisor_handler_execution_and_reinitialise` - /// for more details. - #[error("Hypervisor Handler execution cancel attempt on a finished execution")] - HypervisorHandlerExecutionCancelAttemptOnFinishedExecution(), - - /// A Receive for a Hypervisor Handler Message Timedout - #[error("Hypervisor Handler Message Receive Timedout")] - HypervisorHandlerMessageReceiveTimedout(), - /// Reading Writing or Seeking data failed. #[error("Reading Writing or Seeking data failed {0:?}")] IOError(#[from] std::io::Error), diff --git a/src/hyperlight_host/src/func/call_ctx.rs b/src/hyperlight_host/src/func/call_ctx.rs index baf47730f..eba21d46a 100644 --- a/src/hyperlight_host/src/func/call_ctx.rs +++ b/src/hyperlight_host/src/func/call_ctx.rs @@ -16,7 +16,6 @@ limitations under the License. use tracing::{instrument, Span}; -use super::guest_dispatch::call_function_on_guest; use super::{ParameterTuple, SupportedReturnType}; use crate::{MultiUseSandbox, Result}; /// A context for calling guest functions. @@ -69,8 +68,11 @@ impl MultiUseGuestCallContext { // !Send (and !Sync), we also don't need to worry about // synchronization - let ret = - call_function_on_guest(&mut self.sbox, func_name, Output::TYPE, args.into_value()); + let ret = self.sbox.call_guest_function_by_name_no_reset( + func_name, + Output::TYPE, + args.into_value(), + ); Output::from_value(ret?) } diff --git a/src/hyperlight_host/src/func/guest_dispatch.rs b/src/hyperlight_host/src/func/guest_dispatch.rs deleted file mode 100644 index 8531a4768..000000000 --- a/src/hyperlight_host/src/func/guest_dispatch.rs +++ /dev/null @@ -1,450 +0,0 @@ -/* -Copyright 2025 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; -use hyperlight_common::flatbuffer_wrappers::function_types::{ - ParameterValue, ReturnType, ReturnValue, -}; -use tracing::{instrument, Span}; - -use super::guest_err::check_for_guest_error; -use crate::hypervisor::hypervisor_handler::HypervisorHandlerAction; -use crate::sandbox::WrapperGetter; -use crate::HyperlightError::GuestExecutionHungOnHostFunctionCall; -use crate::{HyperlightError, Result}; - -/// Call a guest function by name, using the given `wrapper_getter`. -#[instrument( - err(Debug), - skip(wrapper_getter, args), - parent = Span::current(), - level = "Trace" -)] -pub(crate) fn call_function_on_guest( - wrapper_getter: &mut impl WrapperGetter, - function_name: &str, - ret_type: ReturnType, - args: Vec, -) -> Result { - let mut timedout = false; - - let fc = FunctionCall::new( - function_name.to_string(), - Some(args), - FunctionCallType::Guest, - ret_type, - ); - - let buffer: Vec = fc - .try_into() - .map_err(|_| HyperlightError::Error("Failed to serialize FunctionCall".to_string()))?; - - { - let mem_mgr = wrapper_getter.get_mgr_wrapper_mut(); - mem_mgr.as_mut().write_guest_function_call(&buffer)?; - } - - let mut hv_handler = wrapper_getter.get_hv_handler().clone(); - match hv_handler.execute_hypervisor_handler_action( - HypervisorHandlerAction::DispatchCallFromHost(function_name.to_string()), - ) { - Ok(()) => {} - Err(e) => match e { - HyperlightError::HypervisorHandlerMessageReceiveTimedout() => { - timedout = true; - match hv_handler.terminate_hypervisor_handler_execution_and_reinitialise( - wrapper_getter.get_mgr_wrapper_mut().unwrap_mgr_mut(), - )? { - HyperlightError::HypervisorHandlerExecutionCancelAttemptOnFinishedExecution() => - {} - // ^^^ do nothing, we just want to actually get the Flatbuffer return value - // from shared memory in this case - e => return Err(e), - } - } - e => return Err(e), - }, - }; - - let mem_mgr = wrapper_getter.get_mgr_wrapper_mut(); - mem_mgr.check_stack_guard()?; // <- wrapper around mem_mgr `check_for_stack_guard` - check_for_guest_error(mem_mgr)?; - - let ret = mem_mgr - .as_mut() - .get_guest_function_call_result() - .map_err(|e| { - if timedout { - // if we timed-out, but still got here - // that means we had actually gotten stuck - // on the execution of a host function, and; - // hence, couldn't cancel guest execution. - // This particular check is needed now, because - // unlike w/ the previous scoped thread usage, - // we can't check if the thread completed or not. - log::error!("Guest execution hung on host function call"); - GuestExecutionHungOnHostFunctionCall() - } else { - e - } - })?; - - Ok(ret) -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; - use std::thread; - - use hyperlight_testing::{callback_guest_as_string, simple_guest_as_string}; - - use crate::func::call_ctx::MultiUseGuestCallContext; - use crate::sandbox::is_hypervisor_present; - use crate::sandbox::uninitialized::GuestBinary; - use crate::sandbox_state::sandbox::EvolvableSandbox; - use crate::sandbox_state::transition::Noop; - use crate::{new_error, HyperlightError, MultiUseSandbox, Result, UninitializedSandbox}; - - // simple function - fn test_function0(_: MultiUseGuestCallContext) -> Result { - Ok(42) - } - - struct GuestStruct; - - // function that return type unsupported by the host - fn test_function1(_: MultiUseGuestCallContext) -> Result { - Ok(GuestStruct) - } - - // function that takes a parameter - fn test_function2(_: MultiUseGuestCallContext, param: i32) -> Result { - Ok(param) - } - - #[test] - // TODO: Investigate why this test fails with an incorrect error when run alongside other tests - #[ignore] - #[cfg(target_os = "linux")] - fn test_violate_seccomp_filters() -> Result<()> { - if !is_hypervisor_present() { - panic!("Panic on create_multi_use_sandbox because no hypervisor is present"); - } - - fn make_get_pid_syscall() -> Result { - let pid = unsafe { libc::syscall(libc::SYS_getpid) }; - Ok(pid as u64) - } - - // First, run to make sure it fails. - { - let mut usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - usbox.register("MakeGetpidSyscall", make_get_pid_syscall)?; - - let mut sbox: MultiUseSandbox = usbox.evolve(Noop::default())?; - - let res: Result = sbox.call_guest_function_by_name("ViolateSeccompFilters", ()); - - #[cfg(feature = "seccomp")] - match res { - Ok(_) => panic!("Expected to fail due to seccomp violation"), - Err(e) => match e { - HyperlightError::DisallowedSyscall => {} - _ => panic!("Expected DisallowedSyscall error: {}", e), - }, - } - - #[cfg(not(feature = "seccomp"))] - match res { - Ok(_) => (), - Err(e) => panic!("Expected to succeed without seccomp: {}", e), - } - } - - // Second, run with allowing `SYS_getpid` - #[cfg(feature = "seccomp")] - { - let mut usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - usbox.register_with_extra_allowed_syscalls( - "MakeGetpidSyscall", - make_get_pid_syscall, - vec![libc::SYS_getpid], - )?; - // ^^^ note, we are allowing SYS_getpid - - let mut sbox: MultiUseSandbox = usbox.evolve(Noop::default())?; - - let res: Result = sbox.call_guest_function_by_name("ViolateSeccompFilters", ()); - - match res { - Ok(_) => {} - Err(e) => panic!("Expected to succeed due to seccomp violation: {}", e), - } - } - - Ok(()) - } - - #[test] - fn test_execute_in_host() { - let uninitialized_sandbox = || { - UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap() - }; - - // test_function0 - { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let result = test_function0(sandbox.new_call_context()); - assert_eq!(result.unwrap(), 42); - } - - // test_function1 - { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let result = test_function1(sandbox.new_call_context()); - assert!(result.is_ok()); - } - - // test_function2 - { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let result = test_function2(sandbox.new_call_context(), 42); - assert_eq!(result.unwrap(), 42); - } - - // test concurrent calls with a local closure that returns current count - { - let count = Arc::new(Mutex::new(0)); - let order = Arc::new(Mutex::new(vec![])); - - let mut handles = vec![]; - - for _ in 0..10 { - let usbox = uninitialized_sandbox(); - let sandbox: MultiUseSandbox = usbox - .evolve(Noop::default()) - .expect("Failed to initialize sandbox"); - let _ctx = sandbox.new_call_context(); - let count = Arc::clone(&count); - let order = Arc::clone(&order); - let handle = thread::spawn(move || { - // we're not actually using the context, but we're calling - // it here to test the mutual exclusion - let mut num = count - .try_lock() - .map_err(|_| new_error!("Error locking")) - .unwrap(); - *num += 1; - order - .try_lock() - .map_err(|_| new_error!("Error locking")) - .unwrap() - .push(*num); - }); - handles.push(handle); - } - - for handle in handles { - handle.join().unwrap(); - } - - // Check if the order of operations is sequential - let order = order - .try_lock() - .map_err(|_| new_error!("Error locking")) - .unwrap(); - for i in 0..10 { - assert_eq!(order[i], i + 1); - } - } - - // TODO: Add tests to ensure State has been reset. - } - - #[track_caller] - fn guest_bin() -> GuestBinary { - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")) - } - - #[track_caller] - fn test_call_guest_function_by_name(u_sbox: UninitializedSandbox) { - let mu_sbox: MultiUseSandbox = u_sbox.evolve(Noop::default()).unwrap(); - - let msg = "Hello, World!!\n".to_string(); - let len = msg.len() as i32; - let mut ctx = mu_sbox.new_call_context(); - let result: i32 = ctx.call("PrintOutput", msg).unwrap(); - - assert_eq!(result, len); - } - - fn call_guest_function_by_name_hv() { - // in-hypervisor mode - let u_sbox = UninitializedSandbox::new( - guest_bin(), - // for now, we're using defaults. In the future, we should get - // variability below - None, - // by default, the below represents in-hypervisor mode - ) - .unwrap(); - test_call_guest_function_by_name(u_sbox); - } - - #[test] - fn test_call_guest_function_by_name_hv() { - call_guest_function_by_name_hv(); - } - - fn terminate_vcpu_after_1000ms() -> Result<()> { - // This test relies upon a Hypervisor being present so for now - // we will skip it if there isn't one. - if !is_hypervisor_present() { - println!("Skipping terminate_vcpu_after_1000ms because no hypervisor is present"); - return Ok(()); - } - let usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - )?; - let sandbox: MultiUseSandbox = usbox.evolve(Noop::default())?; - let mut ctx = sandbox.new_call_context(); - let result: Result<()> = ctx.call("Spin", ()); - - assert!(result.is_err()); - match result.unwrap_err() { - HyperlightError::ExecutionCanceledByHost() => {} - e => panic!( - "Expected HyperlightError::ExecutionCanceledByHost() but got {:?}", - e - ), - } - Ok(()) - } - - // Test that we can terminate a VCPU that has been running the VCPU for too long. - #[test] - fn test_terminate_vcpu_spinning_cpu() -> Result<()> { - terminate_vcpu_after_1000ms()?; - Ok(()) - } - - // Test that we can terminate a VCPU that has been running the VCPU for too long and then call a guest function on the same host thread. - #[test] - fn test_terminate_vcpu_and_then_call_guest_function_on_the_same_host_thread() -> Result<()> { - terminate_vcpu_after_1000ms()?; - call_guest_function_by_name_hv(); - Ok(()) - } - - // This test is to capture the case where the guest execution is running a host function when cancelled and that host function - // is never going to return. - // The host function that is called will end after 5 seconds, but by this time the cancellation will have given up - // (using default timeout settings) , so this tests looks for the error "Failed to cancel guest execution". - - #[test] - fn test_terminate_vcpu_calling_host_spinning_cpu() { - // This test relies upon a Hypervisor being present so for now - // we will skip it if there isn't one. - if !is_hypervisor_present() { - println!("Skipping test_call_guest_function_by_name because no hypervisor is present"); - return; - } - let mut usbox = UninitializedSandbox::new( - GuestBinary::FilePath(callback_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - // Make this host call run for 5 seconds - - fn spin() -> Result<()> { - thread::sleep(std::time::Duration::from_secs(5)); - Ok(()) - } - - #[cfg(any(target_os = "windows", not(feature = "seccomp")))] - usbox.register("Spin", spin).unwrap(); - - #[cfg(all(target_os = "linux", feature = "seccomp"))] - usbox - .register_with_extra_allowed_syscalls("Spin", spin, vec![libc::SYS_clock_nanosleep]) - .unwrap(); - - let sandbox: MultiUseSandbox = usbox.evolve(Noop::default()).unwrap(); - let mut ctx = sandbox.new_call_context(); - let result: Result<()> = ctx.call("CallHostSpin", ()); - - assert!(result.is_err()); - match result.unwrap_err() { - HyperlightError::GuestExecutionHungOnHostFunctionCall() => {} - e => panic!( - "Expected HyperlightError::GuestExecutionHungOnHostFunctionCall but got {:?}", - e - ), - } - } - - #[test] - fn test_trigger_exception_on_guest() { - let usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - None, - ) - .unwrap(); - - let mut multi_use_sandbox: MultiUseSandbox = usbox.evolve(Noop::default()).unwrap(); - - let res: Result<()> = multi_use_sandbox.call_guest_function_by_name("TriggerException", ()); - - assert!(res.is_err()); - - match res.unwrap_err() { - HyperlightError::GuestAborted(_, msg) => { - // msg should indicate we got an invalid opcode exception - assert!(msg.contains("InvalidOpcode")); - } - e => panic!( - "Expected HyperlightError::GuestExecutionError but got {:?}", - e - ), - } - } -} diff --git a/src/hyperlight_host/src/func/mod.rs b/src/hyperlight_host/src/func/mod.rs index 65dc2283f..a70c85fae 100644 --- a/src/hyperlight_host/src/func/mod.rs +++ b/src/hyperlight_host/src/func/mod.rs @@ -18,8 +18,6 @@ limitations under the License. /// functions on the same Hyperlight sandbox instance, all from within the /// same state and mutual exclusion context. pub mod call_ctx; -/// Functionality to dispatch a call from the host to the guest -pub(crate) mod guest_dispatch; /// Functionality to check for errors after a guest call pub(crate) mod guest_err; /// Definitions and functionality to enable guest-to-host function calling, diff --git a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs index 0da2a9434..104f75cab 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_linux.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_linux.rs @@ -25,6 +25,8 @@ extern crate mshv_bindings3 as mshv_bindings; extern crate mshv_ioctls3 as mshv_ioctls; use std::fmt::{Debug, Formatter}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::Arc; use log::{error, LevelFilter}; #[cfg(mshv2)] @@ -46,7 +48,7 @@ use mshv_bindings::{ hv_partition_property_code_HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES, hv_partition_synthetic_processor_features, }; -use mshv_ioctls::{Mshv, VcpuFd, VmFd}; +use mshv_ioctls::{Mshv, MshvError, VcpuFd, VmFd}; use tracing::{instrument, Span}; use super::fpu::{FP_CONTROL_WORD_DEFAULT, FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; @@ -56,13 +58,14 @@ use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, MshvDebu use super::handlers::DbgMemAccessHandlerWrapper; use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; use super::{ - Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, - CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, + Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, + CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, + EFER_SCE, }; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; use crate::hypervisor::HyperlightExit; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; +use crate::sandbox::SandboxConfiguration; #[cfg(gdb)] use crate::HyperlightError; use crate::{log_then_return, new_error, Result}; @@ -286,13 +289,14 @@ pub(crate) fn is_hypervisor_present() -> bool { /// A Hypervisor driver for HyperV-on-Linux. This hypervisor is often /// called the Microsoft Hypervisor (MSHV) -pub(super) struct HypervLinuxDriver { +pub(crate) struct HypervLinuxDriver { _mshv: Mshv, vm_fd: VmFd, vcpu_fd: VcpuFd, entrypoint: u64, mem_regions: Vec, orig_rsp: GuestPtr, + interrupt_handle: Arc, #[cfg(gdb)] debug: Option, @@ -310,11 +314,12 @@ impl HypervLinuxDriver { /// `apply_registers` method to do that, or more likely call /// `initialise` to do it for you. #[instrument(skip_all, parent = Span::current(), level = "Trace")] - pub(super) fn new( + pub(crate) fn new( mem_regions: Vec, entrypoint_ptr: GuestPtr, rsp_ptr: GuestPtr, pml4_ptr: GuestPtr, + config: &SandboxConfiguration, #[cfg(gdb)] gdb_conn: Option>, ) -> Result { let mshv = Mshv::new()?; @@ -390,6 +395,14 @@ impl HypervLinuxDriver { mem_regions, entrypoint: entrypoint_ptr.absolute()?, orig_rsp: rsp_ptr, + interrupt_handle: Arc::new(LinuxInterruptHandle { + running: AtomicU64::new(0), + cancel_requested: AtomicBool::new(false), + tid: AtomicU64::new(unsafe { libc::pthread_self() }), + retry_delay: config.get_interrupt_retry_delay(), + sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), + dropped: AtomicBool::new(false), + }), #[cfg(gdb)] debug, @@ -461,7 +474,6 @@ impl Hypervisor for HypervLinuxDriver { page_size: u32, outb_hdl: OutBHandlerWrapper, mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, max_guest_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()> { @@ -487,7 +499,6 @@ impl Hypervisor for HypervLinuxDriver { VirtualCPU::run( self.as_mut_hypervisor(), - hv_handler, outb_hdl, mem_access_hdl, #[cfg(gdb)] @@ -503,7 +514,6 @@ impl Hypervisor for HypervLinuxDriver { dispatch_func_addr: RawPtr, outb_handle_fn: OutBHandlerWrapper, mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()> { // Reset general purpose registers, then set RIP and RSP @@ -527,7 +537,6 @@ impl Hypervisor for HypervLinuxDriver { // run VirtualCPU::run( self.as_mut_hypervisor(), - hv_handler, outb_handle_fn, mem_access_fn, #[cfg(gdb)] @@ -577,15 +586,62 @@ impl Hypervisor for HypervLinuxDriver { #[cfg(gdb)] const EXCEPTION_INTERCEPT: hv_message_type = hv_message_type_HVMSG_X64_EXCEPTION_INTERCEPT; - #[cfg(mshv2)] - let run_result = { - let hv_message: hv_message = Default::default(); - &self.vcpu_fd.run(hv_message) + self.interrupt_handle + .tid + .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + self.interrupt_handle + .set_running_and_increment_generation() + .map_err(|e| { + new_error!( + "Error setting running state and incrementing generation: {}", + e + ) + })?; + // Don't run the vcpu if `cancel_requested` is true + // + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + let exit_reason = if self + .interrupt_handle + .cancel_requested + .load(Ordering::Relaxed) + { + Err(MshvError::Errno(vmm_sys_util::errno::Error::new( + libc::EINTR, + ))) + } else { + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then the vcpu will run, but we will keep sending signals to this thread + // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will + // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error, + // both of which are fine. + #[cfg(mshv2)] + { + let hv_message: hv_message = Default::default(); + self.vcpu_fd.run(hv_message) + } + #[cfg(mshv3)] + self.vcpu_fd.run() }; - #[cfg(mshv3)] - let run_result = &self.vcpu_fd.run(); - - let result = match run_result { + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then signals will be sent to this thread until `running` is set to false. + // This is fine since the signal handler is a no-op. + let cancel_requested = self + .interrupt_handle + .cancel_requested + .swap(false, Ordering::Relaxed); + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. + // Additionally signals will be sent to this thread until `running` is set to false. + // This is fine since the signal handler is a no-op. + self.interrupt_handle.clear_running_bit(); + // At this point, `running` is false so no more signals will be sent to this thread, + // but we may still receive async signals that were sent before this point. + // To prevent those signals from interrupting subsequent calls to `run()`, + // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). + let result = match exit_reason { Ok(m) => match m.header.message_type { HALT_MESSAGE => { crate::debug!("mshv - Halt Details : {:#?}", &self); @@ -662,7 +718,15 @@ impl Hypervisor for HypervLinuxDriver { }, Err(e) => match e.errno() { // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled - libc::EINTR => HyperlightExit::Cancelled(), + libc::EINTR => { + // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal + // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + if cancel_requested { + HyperlightExit::Cancelled() + } else { + HyperlightExit::Retry() + } + } libc::EAGAIN => HyperlightExit::Retry(), _ => { crate::debug!("mshv Error - Details: Error: {} \n {:#?}", e, &self); @@ -678,6 +742,10 @@ impl Hypervisor for HypervLinuxDriver { self as &mut dyn Hypervisor } + fn interrupt_handle(&self) -> Arc { + self.interrupt_handle.clone() + } + #[cfg(crashdump)] fn get_memory_regions(&self) -> &[MemoryRegion] { &self.mem_regions @@ -732,6 +800,7 @@ impl Hypervisor for HypervLinuxDriver { impl Drop for HypervLinuxDriver { #[instrument(skip_all, parent = Span::current(), level = "Trace")] fn drop(&mut self) { + self.interrupt_handle.dropped.store(true, Ordering::Relaxed); for region in &self.mem_regions { let mshv_region: mshv_user_mem_region = region.to_owned().into(); match self.vm_fd.unmap_user_memory(mshv_region) { @@ -793,11 +862,14 @@ mod tests { MemoryRegionFlags::READ | MemoryRegionFlags::WRITE | MemoryRegionFlags::EXECUTE, crate::mem::memory_region::MemoryRegionType::Code, ); + let config: SandboxConfiguration = Default::default(); + super::HypervLinuxDriver::new( regions.build(), entrypoint_ptr, rsp_ptr, pml4_ptr, + &config, #[cfg(gdb)] None, ) diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 084fda388..f692ec179 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -18,14 +18,17 @@ use core::ffi::c_void; use std::fmt; use std::fmt::{Debug, Formatter}; use std::string::String; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; use hyperlight_common::mem::PAGE_SIZE_USIZE; use log::LevelFilter; use tracing::{instrument, Span}; use windows::Win32::System::Hypervisor::{ - WHvX64RegisterCr0, WHvX64RegisterCr3, WHvX64RegisterCr4, WHvX64RegisterCs, WHvX64RegisterEfer, - WHV_MEMORY_ACCESS_TYPE, WHV_PARTITION_HANDLE, WHV_REGISTER_VALUE, WHV_RUN_VP_EXIT_CONTEXT, - WHV_RUN_VP_EXIT_REASON, WHV_X64_SEGMENT_REGISTER, WHV_X64_SEGMENT_REGISTER_0, + WHvCancelRunVirtualProcessor, WHvX64RegisterCr0, WHvX64RegisterCr3, WHvX64RegisterCr4, + WHvX64RegisterCs, WHvX64RegisterEfer, WHV_MEMORY_ACCESS_TYPE, WHV_PARTITION_HANDLE, + WHV_REGISTER_VALUE, WHV_RUN_VP_EXIT_CONTEXT, WHV_RUN_VP_EXIT_REASON, WHV_X64_SEGMENT_REGISTER, + WHV_X64_SEGMENT_REGISTER_0, }; use super::fpu::{FP_TAG_WORD_DEFAULT, MXCSR_DEFAULT}; @@ -37,11 +40,11 @@ use super::surrogate_process_manager::*; use super::windows_hypervisor_platform::{VMPartition, VMProcessor}; use super::wrappers::{HandleWrapper, WHvFPURegisters}; use super::{ - HyperlightExit, Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, - CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, + HyperlightExit, Hypervisor, InterruptHandle, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, + CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, + EFER_SCE, }; use crate::hypervisor::fpu::FP_CONTROL_WORD_DEFAULT; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; use crate::hypervisor::wrappers::WHvGeneralRegisters; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; @@ -56,6 +59,7 @@ pub(crate) struct HypervWindowsDriver { entrypoint: u64, orig_rsp: GuestPtr, mem_regions: Vec, + interrupt_handle: Arc, } /* This does not automatically impl Send/Sync because the host * address of the shared memory region is a raw pointer, which are @@ -90,6 +94,7 @@ impl HypervWindowsDriver { let mut proc = VMProcessor::new(partition)?; Self::setup_initial_sregs(&mut proc, pml4_address)?; + let partition_handle = proc.get_partition_hdl(); // subtract 2 pages for the guard pages, since when we copy memory to and from surrogate process, // we don't want to copy the guard pages themselves (that would cause access violation) @@ -102,6 +107,12 @@ impl HypervWindowsDriver { entrypoint, orig_rsp: GuestPtr::try_from(RawPtr::from(rsp))?, mem_regions, + interrupt_handle: Arc::new(WindowsInterruptHandle { + running: AtomicBool::new(false), + cancel_requested: AtomicBool::new(false), + partition_handle, + dropped: AtomicBool::new(false), + }), }) } @@ -151,11 +162,6 @@ impl HypervWindowsDriver { error.push_str(&format!("Registers: \n{:#?}", self.processor.get_regs()?)); Ok(error) } - - #[instrument(skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn get_partition_hdl(&self) -> WHV_PARTITION_HANDLE { - self.processor.get_partition_hdl() - } } impl Debug for HypervWindowsDriver { @@ -307,7 +313,6 @@ impl Hypervisor for HypervWindowsDriver { page_size: u32, outb_hdl: OutBHandlerWrapper, mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, max_guest_log_level: Option, #[cfg(gdb)] dbg_mem_access_hdl: DbgMemAccessHandlerWrapper, ) -> Result<()> { @@ -333,7 +338,6 @@ impl Hypervisor for HypervWindowsDriver { VirtualCPU::run( self.as_mut_hypervisor(), - hv_handler, outb_hdl, mem_access_hdl, #[cfg(gdb)] @@ -349,7 +353,6 @@ impl Hypervisor for HypervWindowsDriver { dispatch_func_addr: RawPtr, outb_hdl: OutBHandlerWrapper, mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, #[cfg(gdb)] dbg_mem_access_hdl: DbgMemAccessHandlerWrapper, ) -> Result<()> { // Reset general purpose registers, then set RIP and RSP @@ -371,7 +374,6 @@ impl Hypervisor for HypervWindowsDriver { VirtualCPU::run( self.as_mut_hypervisor(), - hv_handler, outb_hdl, mem_access_hdl, #[cfg(gdb)] @@ -407,7 +409,29 @@ impl Hypervisor for HypervWindowsDriver { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn run(&mut self) -> Result { - let exit_context: WHV_RUN_VP_EXIT_CONTEXT = self.processor.run()?; + self.interrupt_handle.running.store(true, Ordering::Relaxed); + + // Don't run the vcpu if `cancel_requested` is true + let exit_context = if self + .interrupt_handle + .cancel_requested + .load(Ordering::Relaxed) + { + WHV_RUN_VP_EXIT_CONTEXT { + ExitReason: WHV_RUN_VP_EXIT_REASON(8193i32), // WHvRunVpExitReasonCanceled + VpContext: Default::default(), + Anonymous: Default::default(), + Reserved: Default::default(), + } + } else { + self.processor.run()? + }; + self.interrupt_handle + .cancel_requested + .store(false, Ordering::Relaxed); + self.interrupt_handle + .running + .store(false, Ordering::Relaxed); let result = match exit_context.ExitReason { // WHvRunVpExitReasonX64IoPortAccess @@ -481,8 +505,8 @@ impl Hypervisor for HypervWindowsDriver { Ok(result) } - fn get_partition_handle(&self) -> WHV_PARTITION_HANDLE { - self.processor.get_partition_hdl() + fn interrupt_handle(&self) -> Arc { + self.interrupt_handle.clone() } #[instrument(skip_all, parent = Span::current(), level = "Trace")] @@ -496,28 +520,28 @@ impl Hypervisor for HypervWindowsDriver { } } -#[cfg(test)] -pub mod tests { - use std::sync::{Arc, Mutex}; +impl Drop for HypervWindowsDriver { + fn drop(&mut self) { + self.interrupt_handle.dropped.store(true, Ordering::Relaxed); + } +} - use serial_test::serial; +pub struct WindowsInterruptHandle { + // `WHvCancelRunVirtualProcessor()` will return Ok even if the vcpu is not running, which is the reason we need this flag. + running: AtomicBool, + cancel_requested: AtomicBool, + partition_handle: WHV_PARTITION_HANDLE, + dropped: AtomicBool, +} - use crate::hypervisor::handlers::{MemAccessHandler, OutBHandler}; - use crate::hypervisor::tests::test_initialise; - use crate::Result; +impl InterruptHandle for WindowsInterruptHandle { + fn kill(&self) -> bool { + self.cancel_requested.store(true, Ordering::Relaxed); + self.running.load(Ordering::Relaxed) + && unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } + } - #[test] - #[serial] - fn test_init() { - let outb_handler = { - let func: Box Result<()> + Send> = - Box::new(|_, _| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(OutBHandler::from(func))) - }; - let mem_access_handler = { - let func: Box Result<()> + Send> = Box::new(|| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(MemAccessHandler::from(func))) - }; - test_initialise(outb_handler, mem_access_handler).unwrap(); + fn dropped(&self) -> bool { + self.dropped.load(Ordering::Relaxed) } } diff --git a/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs b/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs deleted file mode 100644 index cf43cf3ca..000000000 --- a/src/hyperlight_host/src/hypervisor/hypervisor_handler.rs +++ /dev/null @@ -1,1074 +0,0 @@ -/* -Copyright 2025 The Hyperlight Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#[cfg(target_os = "windows")] -use core::ffi::c_void; -use std::ops::DerefMut; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex}; -use std::thread; -use std::thread::{sleep, JoinHandle}; -use std::time::Duration; - -#[cfg(target_os = "linux")] -use crossbeam::atomic::AtomicCell; -use crossbeam_channel::{Receiver, Sender}; -#[cfg(target_os = "linux")] -use libc::{pthread_kill, pthread_self, ESRCH}; -use log::{error, info, LevelFilter}; -use tracing::{instrument, Span}; -#[cfg(target_os = "linux")] -use vmm_sys_util::signal::SIGRTMIN; -#[cfg(target_os = "windows")] -use windows::Win32::System::Hypervisor::{WHvCancelRunVirtualProcessor, WHV_PARTITION_HANDLE}; - -#[cfg(gdb)] -use super::gdb::create_gdb_thread; -#[cfg(gdb)] -use crate::hypervisor::handlers::DbgMemAccessHandlerWrapper; -use crate::hypervisor::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; -#[cfg(target_os = "windows")] -use crate::hypervisor::wrappers::HandleWrapper; -use crate::hypervisor::Hypervisor; -use crate::mem::layout::SandboxMemoryLayout; -use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::{GuestPtr, RawPtr}; -use crate::mem::ptr_offset::Offset; -use crate::mem::shared_mem::{GuestSharedMemory, HostSharedMemory, SharedMemory}; -#[cfg(gdb)] -use crate::sandbox::config::DebugInfo; -use crate::sandbox::hypervisor::{get_available_hypervisor, HypervisorType}; -#[cfg(target_os = "linux")] -use crate::signal_handlers::setup_signal_handlers; -use crate::HyperlightError::{ - GuestExecutionHungOnHostFunctionCall, - HypervisorHandlerExecutionCancelAttemptOnFinishedExecution, NoHypervisorFound, -}; -use crate::{log_then_return, new_error, HyperlightError, Result}; - -type HypervisorHandlerTx = Sender; -type HypervisorHandlerRx = Receiver; -type HandlerMsgTx = Sender; -type HandlerMsgRx = Receiver; - -#[derive(Clone)] -pub(crate) struct HypervisorHandler { - communication_channels: HvHandlerCommChannels, - configuration: HvHandlerConfig, - execution_variables: HvHandlerExecVars, -} - -impl HypervisorHandler { - pub(crate) fn set_running(&self, running: bool) { - self.execution_variables - .running - .store(running, Ordering::SeqCst); - } - - #[cfg(target_os = "linux")] - pub(crate) fn set_run_cancelled(&self, run_cancelled: bool) { - self.execution_variables.run_cancelled.store(run_cancelled); - } -} - -// Note: `join_handle` and `running` have to be `Arc` because we need -// this struct to be `Clone` to be able to pass it to the Hypervisor handler thread. -// -// `join_handle` also has to be `Mutex` because we need to be able to `take` it when we -// `try_join_hypervisor_handler_thread`. -#[derive(Clone)] -struct HvHandlerExecVars { - join_handle: Arc>>>>, - shm: Arc>>>, - timeout: Arc>, - #[cfg(target_os = "linux")] - thread_id: Arc>>, - #[cfg(target_os = "windows")] - partition_handle: Arc>>, - running: Arc, - #[cfg(target_os = "linux")] - run_cancelled: Arc>, -} - -impl HvHandlerExecVars { - /// Sets the `join_handle`, to be called `thread::spawn` in `start_hypervisor_handler`. - fn set_join_handle(&mut self, join_handle: JoinHandle>) -> Result<()> { - *self - .join_handle - .try_lock() - .map_err(|_| new_error!("Failed to set_join_handle"))? = Some(join_handle); - - Ok(()) - } - - #[cfg(target_os = "linux")] - fn set_thread_id(&mut self, thread_id: libc::pthread_t) -> Result<()> { - *self - .thread_id - .try_lock() - .map_err(|_| new_error!("Failed to set_thread_id"))? = Some(thread_id); - - Ok(()) - } - - #[cfg(target_os = "linux")] - fn get_thread_id(&self) -> Result { - (*self - .thread_id - .try_lock() - .map_err(|_| new_error!("Failed to get_thread_id"))?) - .ok_or_else(|| new_error!("thread_id not set")) - } - - #[cfg(target_os = "windows")] - fn set_partition_handle(&mut self, partition_handle: WHV_PARTITION_HANDLE) -> Result<()> { - *self - .partition_handle - .try_lock() - .map_err(|_| new_error!("Failed to set_partition_handle"))? = Some(partition_handle); - - Ok(()) - } - - #[cfg(target_os = "windows")] - fn get_partition_handle(&self) -> Result> { - Ok(*self - .partition_handle - .try_lock() - .map_err(|_| new_error!("Failed to get_partition_handle"))?) - } - - fn set_timeout(&mut self, timeout: Duration) -> Result<()> { - *self - .timeout - .try_lock() - .map_err(|_| new_error!("Failed to set_timeout"))? = timeout; - - Ok(()) - } - - fn get_timeout(&self) -> Result { - Ok(*self - .timeout - .try_lock() - .map_err(|_| new_error!("Failed to get_timeout"))?) - } -} - -#[derive(Clone)] -struct HvHandlerCommChannels { - to_handler_tx: HypervisorHandlerTx, - to_handler_rx: HypervisorHandlerRx, - from_handler_tx: HandlerMsgTx, - from_handler_rx: HandlerMsgRx, -} - -#[derive(Clone)] -pub(crate) struct HvHandlerConfig { - pub(crate) peb_addr: RawPtr, - pub(crate) seed: u64, - pub(crate) page_size: u32, - pub(crate) dispatch_function_addr: Arc>>, - pub(crate) max_init_time: Duration, - pub(crate) max_exec_time: Duration, - pub(crate) outb_handler: OutBHandlerWrapper, - pub(crate) mem_access_handler: MemAccessHandlerWrapper, - pub(crate) max_wait_for_cancellation: Duration, - pub(crate) max_guest_log_level: Option, - #[cfg(gdb)] - pub(crate) dbg_mem_access_handler: DbgMemAccessHandlerWrapper, -} - -impl HypervisorHandler { - /// Creates a new Hypervisor Handler with a given configuration. This call must precede a call - /// to `start_hypervisor_handler`. - pub(crate) fn new(configuration: HvHandlerConfig) -> Self { - let (to_handler_tx, to_handler_rx) = crossbeam_channel::unbounded(); - let (from_handler_tx, from_handler_rx) = crossbeam_channel::unbounded(); - - let communication_channels = HvHandlerCommChannels { - to_handler_tx, - to_handler_rx, - from_handler_tx, - from_handler_rx, - }; - - let execution_variables = HvHandlerExecVars { - join_handle: Arc::new(Mutex::new(None)), - shm: Arc::new(Mutex::new(None)), - #[cfg(target_os = "linux")] - thread_id: Arc::new(Mutex::new(None)), - #[cfg(target_os = "windows")] - partition_handle: Arc::new(Mutex::new(None)), - running: Arc::new(AtomicBool::new(false)), - #[cfg(target_os = "linux")] - run_cancelled: Arc::new(AtomicCell::new(false)), - timeout: Arc::new(Mutex::new(configuration.max_init_time)), - }; - - Self { - communication_channels, - configuration, - execution_variables, - } - } - - /// Sets up a Hypervisor 'handler', designed to listen to messages to execute a specific action, - /// such as: - /// - `initialise` resources, - /// - `dispatch_call_from_host` in the vCPU, and - /// - `terminate_execution` of the vCPU. - /// - /// To send messages to the hypervisor handler thread, use `execute_hypervisor_handler_action`. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn start_hypervisor_handler( - &mut self, - sandbox_memory_manager: SandboxMemoryManager, - #[cfg(gdb)] debug_info: Option, - ) -> Result<()> { - let configuration = self.configuration.clone(); - - *self - .execution_variables - .shm - .try_lock() - .map_err(|e| new_error!("Failed to lock shm: {}", e))? = Some(sandbox_memory_manager); - - // Other than running initialization and code execution, the handler thread also handles - // cancellation. When we need to cancel the execution there are 2 possible cases - // we have to deal with depending on if the vCPU is currently running or not. - // - // 1. If the vCPU is executing, then we need to cancel the execution. - // 2. If the vCPU is not executing, then we need to signal to the thread - // that it should exit the loop. - // - // For the first case, on Linux, we send a signal to the thread running the - // vCPU to interrupt it and cause an EINTR error on the underlying VM run call. - // - // For the second case, we set a flag that is checked on each iteration of the run loop - // and if it is set to true then the loop will exit. - - // On Linux, we have another problem to deal with. The way we terminate a running vCPU - // (case 1 above) is to send a signal to the thread running the vCPU to interrupt it. - // - // There is a possibility that the signal is sent and received just before the thread - // calls run on the vCPU (between the check on the cancelled_run variable and the call to run) - // - see this StackOverflow question for more details - // https://stackoverflow.com/questions/25799667/fixing-race-condition-when-sending-signal-to-interrupt-system-call) - // - // To solve this, we need to keep sending the signal until we know that the spawned thread - // knows it should cancel the execution. - #[cfg(target_os = "linux")] - self.execution_variables.run_cancelled.store(false); - - let to_handler_rx = self.communication_channels.to_handler_rx.clone(); - let mut execution_variables = self.execution_variables.clone(); - let from_handler_tx = self.communication_channels.from_handler_tx.clone(); - let hv_handler_clone = self.clone(); - - // Hyperlight has two signal handlers: - // (1) for timeouts, and - // (2) for seccomp (when enabled). - // - // This sets up Hyperlight signal handlers for the process, which are chained - // to the existing signal handlers. - #[cfg(target_os = "linux")] - setup_signal_handlers()?; - - let join_handle = { - thread::Builder::new() - .name("Hypervisor Handler".to_string()) - .spawn(move || -> Result<()> { - let mut hv: Option> = None; - for action in to_handler_rx { - match action { - HypervisorHandlerAction::Initialise => { - { - hv = Some(set_up_hypervisor_partition( - execution_variables.shm.try_lock().map_err(|e| new_error!("Failed to lock shm: {}", e))?.deref_mut().as_mut().ok_or_else(|| new_error!("shm not set"))?, - #[cfg(gdb)] - &debug_info, - )?); - } - let hv = hv.as_mut().ok_or_else(|| new_error!("Hypervisor not set"))?; - - #[cfg(target_os = "windows")] - execution_variables.set_partition_handle(hv.get_partition_handle())?; - #[cfg(target_os = "linux")] - { - // We cannot use the Killable trait, so we get the `pthread_t` via a libc - // call. - execution_variables.set_thread_id(unsafe { pthread_self() })?; - } - - #[cfg(target_os = "linux")] - execution_variables.run_cancelled.store(false); - - log::info!("Initialising Hypervisor Handler"); - - let mut evar_lock_guard = - execution_variables.shm.try_lock().map_err(|e| { - new_error!( - "Error locking exec var shm lock: {}:{}: {}", - file!(), - line!(), - e - ) - })?; - // This apparently-useless lock is - // needed to ensure the host does not - // make unsynchronized accesses while - // the guest is executing. See the - // documentation for - // GuestSharedMemory::lock. - let mem_lock_guard = evar_lock_guard - .as_mut() - .ok_or_else(|| { - new_error!("guest shm lock: {}:{}:", file!(), line!()) - })? - .shared_mem - .lock - .try_read(); - - let res = hv.initialise( - configuration.peb_addr.clone(), - configuration.seed, - configuration.page_size, - configuration.outb_handler.clone(), - configuration.mem_access_handler.clone(), - Some(hv_handler_clone.clone()), - configuration.max_guest_log_level, - #[cfg(gdb)] - configuration.dbg_mem_access_handler.clone(), - ); - drop(mem_lock_guard); - drop(evar_lock_guard); - - execution_variables.running.store(false, Ordering::SeqCst); - - match res { - Ok(_) => { - log::info!("Initialised Hypervisor Handler"); - from_handler_tx - .send(HandlerMsg::FinishedHypervisorHandlerAction) - .map_err(|_| { - HyperlightError::HypervisorHandlerCommunicationFailure() - })?; - } - Err(e) => { - log::info!( - "Error initialising Hypervisor Handler: {:?}", - e - ); - from_handler_tx.send(HandlerMsg::Error(e)).map_err(|_| { - HyperlightError::HypervisorHandlerCommunicationFailure() - })?; - } - } - } - HypervisorHandlerAction::DispatchCallFromHost(function_name) => { - let hv = hv.as_mut().ok_or_else(|| new_error!("Hypervisor not initialized"))?; - - #[cfg(target_os = "linux")] - execution_variables.run_cancelled.store(false); - - info!("Dispatching call from host: {}", function_name); - - let dispatch_function_addr = configuration - .dispatch_function_addr - .clone() - .try_lock() - .map_err(|e| { - new_error!( - "Error locking at {}:{}: {}", - file!(), - line!(), - e - ) - })? - .clone() - .ok_or_else(|| new_error!("Hypervisor not initialized"))?; - - let mut evar_lock_guard = - execution_variables.shm.try_lock().map_err(|e| { - new_error!( - "Error locking exec var shm lock: {}:{}: {}", - file!(), - line!(), - e - ) - })?; - // This apparently-useless lock is - // needed to ensure the host does not - // make unsynchronized accesses while - // the guest is executing. See the - // documentation for - // GuestSharedMemory::lock. - let mem_lock_guard = evar_lock_guard - .as_mut() - .ok_or_else(|| { - new_error!("guest shm lock {}:{}", file!(), line!()) - })? - .shared_mem - .lock - .try_read(); - - let res = crate::metrics::maybe_time_and_emit_guest_call( - &function_name, - || { - hv.dispatch_call_from_host( - dispatch_function_addr, - configuration.outb_handler.clone(), - configuration.mem_access_handler.clone(), - Some(hv_handler_clone.clone()), - #[cfg(gdb)] - configuration.dbg_mem_access_handler.clone(), - ) - }, - ); - - drop(mem_lock_guard); - drop(evar_lock_guard); - - execution_variables.running.store(false, Ordering::SeqCst); - - match res { - Ok(_) => { - log::info!( - "Finished dispatching call from host: {}", - function_name - ); - from_handler_tx - .send(HandlerMsg::FinishedHypervisorHandlerAction) - .map_err(|_| { - HyperlightError::HypervisorHandlerCommunicationFailure() - })?; - } - Err(e) => { - log::info!( - "Error dispatching call from host: {}: {:?}", - function_name, - e - ); - from_handler_tx.send(HandlerMsg::Error(e)).map_err(|_| { - HyperlightError::HypervisorHandlerCommunicationFailure() - })?; - } - } - } - HypervisorHandlerAction::TerminateHandlerThread => { - info!("Terminating Hypervisor Handler Thread"); - break; - } - } - } - - // If we make it here, it means the main thread issued a `TerminateHandlerThread` action, - // and we are now exiting the handler thread. - { - from_handler_tx - .send(HandlerMsg::FinishedHypervisorHandlerAction) - .map_err(|_| { - HyperlightError::HypervisorHandlerCommunicationFailure() - })?; - } - - Ok(()) - }) - }; - - self.execution_variables.set_join_handle(join_handle?)?; - - Ok(()) - } - - /// Try `join` on `HypervisorHandler` thread for `timeout` duration. - /// - Before attempting a join, this function checks if execution isn't already finished. - /// Note: This function call takes ownership of the `JoinHandle`. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn try_join_hypervisor_handler_thread(&mut self) -> Result<()> { - let mut join_handle_guard = self - .execution_variables - .join_handle - .try_lock() - .map_err(|e| new_error!("Error locking at {}:{}: {}", file!(), line!(), e))?; - if let Some(handle) = join_handle_guard.take() { - // check if thread is handle.is_finished for `timeout` - // note: dropping the transmitter in `kill_hypervisor_handler_thread` - // should have caused the thread to finish, in here, we are just syncing. - let now = std::time::Instant::now(); - - while now.elapsed() < self.execution_variables.get_timeout()? { - if handle.is_finished() { - match handle.join() { - // as per docs, join should return immediately and not hang if finished - Ok(Ok(())) => return Ok(()), - Ok(Err(e)) => { - log_then_return!(e); - } - Err(e) => { - log_then_return!(new_error!("{:?}", e)); - } - } - } - sleep(Duration::from_millis(1)); // sleep to not busy wait - } - } - - return Err(HyperlightError::Error( - "Failed to finish Hypervisor handler thread".to_string(), - )); - } - - /// Tries to kill the Hypervisor Handler Thread. - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn kill_hypervisor_handler_thread(&mut self) -> Result<()> { - log::debug!("Killing Hypervisor Handler Thread"); - self.execute_hypervisor_handler_action(HypervisorHandlerAction::TerminateHandlerThread)?; - - self.try_join_hypervisor_handler_thread() - } - - /// Send a message to the Hypervisor Handler and wait for a response. - /// - /// This function should be used for most interactions with the Hypervisor - /// Handler. - pub(crate) fn execute_hypervisor_handler_action( - &mut self, - hypervisor_handler_action: HypervisorHandlerAction, - ) -> Result<()> { - log::debug!( - "Sending Hypervisor Handler Action: {:?}", - hypervisor_handler_action - ); - - match hypervisor_handler_action { - HypervisorHandlerAction::Initialise => self - .execution_variables - .set_timeout(self.configuration.max_init_time)?, - HypervisorHandlerAction::DispatchCallFromHost(_) => self - .execution_variables - .set_timeout(self.configuration.max_exec_time)?, - HypervisorHandlerAction::TerminateHandlerThread => self - .execution_variables - .set_timeout(self.configuration.max_init_time)?, - // note: terminate can never hang, so setting the timeout for it is just - // for completion of the match statement, and it is not really needed for - // `TerminateHandlerThread`. - } - - self.set_running(true); - self.communication_channels - .to_handler_tx - .send(hypervisor_handler_action) - .map_err(|_| HyperlightError::HypervisorHandlerCommunicationFailure())?; - - log::debug!("Waiting for Hypervisor Handler Response"); - - self.try_receive_handler_msg() - } - - /// Try to receive a `HandlerMsg` from the Hypervisor Handler Thread. - /// - /// Usually, you should use `execute_hypervisor_handler_action` to send and instantly - /// try to receive a message. - /// - /// This function is only useful when we time out, handle a timeout, - /// and still have to receive after sorting that out without sending - /// an extra message. - pub(crate) fn try_receive_handler_msg(&self) -> Result<()> { - // When gdb debugging is enabled, we don't want to timeout on receiving messages - // from the handler thread, as the thread may be paused by gdb. - // In this case, we will wait indefinitely for a message from the handler thread. - // Note: This applies to all the running sandboxes, not just the one being debugged. - #[cfg(gdb)] - let response = self.communication_channels.from_handler_rx.recv(); - #[cfg(not(gdb))] - let response = self - .communication_channels - .from_handler_rx - .recv_timeout(self.execution_variables.get_timeout()?); - - match response { - Ok(msg) => match msg { - HandlerMsg::Error(e) => Err(e), - HandlerMsg::FinishedHypervisorHandlerAction => Ok(()), - }, - Err(_) => { - // If we have timed out it may be that the handler thread returned an error before it sent a message, so rather than just timeout here - // we will try and get the join handle for the thread and if it has finished check to see if it returned an error - // if it did then we will return that error, otherwise we will return the timeout error - // we need to take ownership of the handle to join it - match self - .execution_variables - .join_handle - .try_lock() - .map_err(|_| HyperlightError::HypervisorHandlerMessageReceiveTimedout())? - .take_if(|handle| handle.is_finished()) - { - Some(handle) => { - // If the thread has finished, we try to join it and return the error if it has one - let res = handle.join(); - if res.as_ref().is_ok_and(|inner_res| inner_res.is_err()) { - #[allow(clippy::unwrap_used)] - // We know that the thread has finished and that the inner result is an error, so we can safely unwrap the result and the contained err - return Err(res.unwrap().unwrap_err()); - } - Err(HyperlightError::HypervisorHandlerMessageReceiveTimedout()) - } - None => Err(HyperlightError::HypervisorHandlerMessageReceiveTimedout()), - } - } - } - } - - /// Terminate the execution of the hypervisor handler - /// - /// This function is intended to be called after a guest function called has - /// timed-out (i.e., `from_handler_rx.recv_timeout(timeout).is_err()`). - /// - /// It is possible that, even after we timed-out, the guest function execution will - /// finish. If that is the case, this function is fundamentally a NOOP, because it - /// will restore the memory snapshot to the last state, and then re-initialise the - /// accidentally terminated vCPU. - /// - /// This function, usually, will return one of the following HyperlightError's - /// - `ExecutionCanceledByHost` if the execution was successfully terminated, or - /// - `HypervisorHandlerExecutionCancelAttemptOnFinishedExecution` if the execution - /// finished while we tried to terminate it. - /// - /// Hence, common usage of this function would be to match on the result. If you get a - /// `HypervisorHandlerExecutionCancelAttemptOnFinishedExecution`, you can safely ignore - /// retrieve the return value from shared memory. - pub(crate) fn terminate_hypervisor_handler_execution_and_reinitialise( - &mut self, - sandbox_memory_manager: &mut SandboxMemoryManager, - ) -> Result { - { - if !self.execution_variables.running.load(Ordering::SeqCst) { - info!("Execution finished while trying to cancel it"); - return Ok(HypervisorHandlerExecutionCancelAttemptOnFinishedExecution()); - } else { - self.terminate_execution()?; - } - } - - { - sleep(self.configuration.max_wait_for_cancellation); - // check if still running - if self.execution_variables.running.load(Ordering::SeqCst) { - // If we still fail to acquire the hv_lock, this means that - // we had actually timed-out on a host function call as the - // `WHvCancelRunVirtualProcessor` didn't unlock. - - log::info!("Tried to cancel guest execution on host function call"); - return Err(GuestExecutionHungOnHostFunctionCall()); - } - } - - // Receive `ExecutionCancelledByHost` or other - let res = match self.try_receive_handler_msg() { - Ok(_) => Ok(new_error!( - "Expected ExecutionCanceledByHost, but received FinishedHypervisorHandlerAction" - )), - Err(e) => match e { - HyperlightError::ExecutionCanceledByHost() => { - Ok(HyperlightError::ExecutionCanceledByHost()) - } - _ => Ok(new_error!( - "Expected ExecutionCanceledByHost, but received: {:?}", - e - )), - }, - }; - - // We cancelled execution, so we restore the state to what it was prior to the bad state - // that caused the timeout. - sandbox_memory_manager.restore_state_from_last_snapshot()?; - - // Re-initialise the vCPU. - // This is 100% needed because, otherwise, all it takes to cause a DoS is for a - // function to timeout as the vCPU will be in a bad state without re-init. - log::debug!("Re-initialising vCPU"); - self.execute_hypervisor_handler_action(HypervisorHandlerAction::Initialise)?; - - res - } - - pub(crate) fn set_dispatch_function_addr( - &mut self, - dispatch_function_addr: RawPtr, - ) -> Result<()> { - *self - .configuration - .dispatch_function_addr - .try_lock() - .map_err(|_| new_error!("Failed to set_dispatch_function_addr"))? = - Some(dispatch_function_addr); - - Ok(()) - } - - #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(crate) fn terminate_execution(&self) -> Result<()> { - error!( - "Execution timed out after {} milliseconds , cancelling execution", - self.execution_variables.get_timeout()?.as_millis() - ); - - #[cfg(target_os = "linux")] - { - let thread_id = self.execution_variables.get_thread_id()?; - if thread_id == u64::MAX { - log_then_return!("Failed to get thread id to signal thread"); - } - let mut count: u128 = 0; - // We need to send the signal multiple times in case the thread was between checking if it - // should be cancelled and entering the run loop - - // We cannot do this forever (if the thread is calling a host function that never - // returns we will sit here forever), so use the timeout_wait_to_cancel to limit the number - // of iterations - - let number_of_iterations = - self.configuration.max_wait_for_cancellation.as_micros() / 500; - - while !self.execution_variables.run_cancelled.load() { - count += 1; - - if count > number_of_iterations { - break; - } - - info!( - "Sending signal to thread {} iteration: {}", - thread_id, count - ); - - let ret = unsafe { pthread_kill(thread_id, SIGRTMIN()) }; - // We may get ESRCH if we try to signal a thread that has already exited - if ret < 0 && ret != ESRCH { - log_then_return!("error {} calling pthread_kill", ret); - } - std::thread::sleep(Duration::from_micros(500)); - } - if !self.execution_variables.run_cancelled.load() { - log_then_return!(GuestExecutionHungOnHostFunctionCall()); - } - } - #[cfg(target_os = "windows")] - { - unsafe { - WHvCancelRunVirtualProcessor( - #[allow(clippy::unwrap_used)] - self.execution_variables.get_partition_handle()?.unwrap(), // safe unwrap as we checked is some - 0, - 0, - ) - .map_err(|e| new_error!("Failed to cancel guest execution {:?}", e))?; - } - } - - Ok(()) - } -} - -/// `HypervisorHandlerActions` enumerates the -/// possible actions that a Hypervisor -/// handler can execute. -pub enum HypervisorHandlerAction { - /// Initialise the vCPU - Initialise, - /// Execute a function call (String = name) from the host - DispatchCallFromHost(String), - /// Terminate hypervisor handler thread - TerminateHandlerThread, -} - -// Debug impl for HypervisorHandlerAction: -// - just prints the enum variant type name. -impl std::fmt::Debug for HypervisorHandlerAction { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - HypervisorHandlerAction::Initialise => write!(f, "Initialise"), - HypervisorHandlerAction::DispatchCallFromHost(_) => write!(f, "DispatchCallFromHost"), - HypervisorHandlerAction::TerminateHandlerThread => write!(f, "TerminateHandlerThread"), - } - } -} - -/// `HandlerMsg` is structure used by the Hypervisor -/// handler to indicate that the Hypervisor Handler has -/// finished performing an action (i.e., `DispatchCallFromHost`, or -/// `Initialise`). -pub enum HandlerMsg { - FinishedHypervisorHandlerAction, - Error(HyperlightError), -} - -fn set_up_hypervisor_partition( - mgr: &mut SandboxMemoryManager, - #[cfg(gdb)] debug_info: &Option, -) -> Result> { - let mem_size = u64::try_from(mgr.shared_mem.mem_size())?; - let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; - let rsp_ptr = { - let rsp_u64 = mgr.set_up_shared_memory(mem_size, &mut regions)?; - let rsp_raw = RawPtr::from(rsp_u64); - GuestPtr::try_from(rsp_raw) - }?; - let base_ptr = GuestPtr::try_from(Offset::from(0))?; - let pml4_ptr = { - let pml4_offset_u64 = u64::try_from(SandboxMemoryLayout::PML4_OFFSET)?; - base_ptr + Offset::from(pml4_offset_u64) - }; - let entrypoint_ptr = { - let entrypoint_total_offset = mgr.load_addr.clone() + mgr.entrypoint_offset; - GuestPtr::try_from(entrypoint_total_offset) - }?; - - if base_ptr != pml4_ptr { - log_then_return!( - "Error: base_ptr ({:#?}) does not equal pml4_ptr ({:#?})", - base_ptr, - pml4_ptr - ); - } - if entrypoint_ptr <= pml4_ptr { - log_then_return!( - "Error: entrypoint_ptr ({:#?}) is not greater than pml4_ptr ({:#?})", - entrypoint_ptr, - pml4_ptr - ); - } - - // Create gdb thread if gdb is enabled and the configuration is provided - #[cfg(gdb)] - let gdb_conn = if let Some(DebugInfo { port }) = debug_info { - let gdb_conn = create_gdb_thread(*port, unsafe { pthread_self() }); - - // in case the gdb thread creation fails, we still want to continue - // without gdb - match gdb_conn { - Ok(gdb_conn) => Some(gdb_conn), - Err(e) => { - log::error!("Could not create gdb connection: {:#}", e); - - None - } - } - } else { - None - }; - - match *get_available_hypervisor() { - #[cfg(mshv)] - Some(HypervisorType::Mshv) => { - let hv = crate::hypervisor::hyperv_linux::HypervLinuxDriver::new( - regions, - entrypoint_ptr, - rsp_ptr, - pml4_ptr, - #[cfg(gdb)] - gdb_conn, - )?; - Ok(Box::new(hv)) - } - - #[cfg(kvm)] - Some(HypervisorType::Kvm) => { - let hv = crate::hypervisor::kvm::KVMDriver::new( - regions, - pml4_ptr.absolute()?, - entrypoint_ptr.absolute()?, - rsp_ptr.absolute()?, - #[cfg(gdb)] - gdb_conn, - )?; - Ok(Box::new(hv)) - } - - #[cfg(target_os = "windows")] - Some(HypervisorType::Whp) => { - let mmap_file_handle = mgr - .shared_mem - .with_exclusivity(|e| e.get_mmap_file_handle())?; - let hv = crate::hypervisor::hyperv_windows::HypervWindowsDriver::new( - regions, - mgr.shared_mem.raw_mem_size(), // we use raw_* here because windows driver requires 64K aligned addresses, - mgr.shared_mem.raw_ptr() as *mut c_void, // and instead convert it to base_addr where needed in the driver itself - pml4_ptr.absolute()?, - entrypoint_ptr.absolute()?, - rsp_ptr.absolute()?, - HandleWrapper::from(mmap_file_handle), - )?; - Ok(Box::new(hv)) - } - - _ => { - log_then_return!(NoHypervisorFound()); - } - } -} - -#[cfg(test)] -mod tests { - use std::sync::{Arc, Barrier}; - use std::thread; - - use hyperlight_testing::simple_guest_as_string; - - #[cfg(target_os = "windows")] - use crate::sandbox::SandboxConfiguration; - use crate::sandbox::WrapperGetter; - use crate::sandbox_state::sandbox::EvolvableSandbox; - use crate::sandbox_state::transition::Noop; - use crate::HyperlightError::HypervisorHandlerExecutionCancelAttemptOnFinishedExecution; - use crate::{ - is_hypervisor_present, GuestBinary, HyperlightError, MultiUseSandbox, Result, - UninitializedSandbox, - }; - - fn create_multi_use_sandbox() -> MultiUseSandbox { - if !is_hypervisor_present() { - panic!("Panic on create_multi_use_sandbox because no hypervisor is present"); - } - - // Tests that use this function seem to fail with timeouts sporadically on windows so timeouts are raised here - - let cfg = { - #[cfg(target_os = "windows")] - { - let mut cfg = SandboxConfiguration::default(); - cfg.set_max_initialization_time(std::time::Duration::from_secs(10)); - cfg.set_max_execution_time(std::time::Duration::from_secs(3)); - Some(cfg) - } - #[cfg(not(target_os = "windows"))] - { - None - } - }; - - let usbox = UninitializedSandbox::new( - GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), - cfg, - ) - .unwrap(); - - usbox.evolve(Noop::default()).unwrap() - } - - #[test] - #[ignore] // this test runs by itself because it uses a lot of system resources - fn create_1000_sandboxes() { - let barrier = Arc::new(Barrier::new(21)); - - let mut handles = vec![]; - - for _ in 0..20 { - let c = barrier.clone(); - - let handle = thread::spawn(move || { - c.wait(); - - for _ in 0..50 { - create_multi_use_sandbox(); - } - }); - - handles.push(handle); - } - - barrier.wait(); - - for handle in handles { - handle.join().unwrap(); - } - } - - #[test] - fn create_10_sandboxes() { - for _ in 0..10 { - create_multi_use_sandbox(); - } - } - - #[test] - fn hello_world() -> Result<()> { - let mut sandbox = create_multi_use_sandbox(); - - let msg = "Hello, World!\n".to_string(); - let res = sandbox.call_guest_function_by_name::("PrintOutput", msg); - - assert!(res.is_ok()); - - Ok(()) - } - - #[test] - fn terminate_execution_then_call_another_function() -> Result<()> { - let mut sandbox = create_multi_use_sandbox(); - - let res = sandbox.call_guest_function_by_name::<()>("Spin", ()); - - assert!(res.is_err()); - - match res.err().unwrap() { - HyperlightError::ExecutionCanceledByHost() => {} - _ => panic!("Expected ExecutionTerminated error"), - } - - let res = sandbox.call_guest_function_by_name::("Echo", "a".to_string()); - - assert!(res.is_ok()); - - Ok(()) - } - - #[test] - fn terminate_execution_of_an_already_finished_function_then_call_another_function() -> Result<()> - { - let call_print_output = |sandbox: &mut MultiUseSandbox| { - let msg = "Hello, World!\n".to_string(); - let res = sandbox.call_guest_function_by_name::("PrintOutput", msg); - - assert!(res.is_ok()); - }; - - let mut sandbox = create_multi_use_sandbox(); - call_print_output(&mut sandbox); - - // this simulates what would happen if a function actually successfully - // finished while we attempted to terminate execution - { - match sandbox - .get_hv_handler() - .clone() - .terminate_hypervisor_handler_execution_and_reinitialise( - sandbox.get_mgr_wrapper_mut().unwrap_mgr_mut(), - )? { - HypervisorHandlerExecutionCancelAttemptOnFinishedExecution() => {} - _ => panic!("Expected error demonstrating execution wasn't cancelled properly"), - } - } - - call_print_output(&mut sandbox); - call_print_output(&mut sandbox); - - Ok(()) - } -} diff --git a/src/hyperlight_host/src/hypervisor/kvm.rs b/src/hyperlight_host/src/hypervisor/kvm.rs index b04464611..c7aefebc8 100644 --- a/src/hyperlight_host/src/hypervisor/kvm.rs +++ b/src/hyperlight_host/src/hypervisor/kvm.rs @@ -16,8 +16,10 @@ limitations under the License. use std::convert::TryFrom; use std::fmt::Debug; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::Arc; #[cfg(gdb)] -use std::sync::{Arc, Mutex}; +use std::sync::Mutex; use kvm_bindings::{kvm_fpu, kvm_regs, kvm_userspace_memory_region, KVM_MEM_READONLY}; use kvm_ioctls::Cap::UserMemory; @@ -32,12 +34,13 @@ use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, GuestDebug, KvmDebug use super::handlers::DbgMemAccessHandlerWrapper; use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; use super::{ - HyperlightExit, Hypervisor, VirtualCPU, CR0_AM, CR0_ET, CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, - CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, EFER_LME, EFER_NX, EFER_SCE, + HyperlightExit, Hypervisor, InterruptHandle, LinuxInterruptHandle, VirtualCPU, CR0_AM, CR0_ET, + CR0_MP, CR0_NE, CR0_PE, CR0_PG, CR0_WP, CR4_OSFXSR, CR4_OSXMMEXCPT, CR4_PAE, EFER_LMA, + EFER_LME, EFER_NX, EFER_SCE, }; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; use crate::mem::memory_region::{MemoryRegion, MemoryRegionFlags}; use crate::mem::ptr::{GuestPtr, RawPtr}; +use crate::sandbox::SandboxConfiguration; #[cfg(gdb)] use crate::HyperlightError; use crate::{log_then_return, new_error, Result}; @@ -274,13 +277,14 @@ mod debug { } /// A Hypervisor driver for KVM on Linux -pub(super) struct KVMDriver { +pub(crate) struct KVMDriver { _kvm: Kvm, _vm_fd: VmFd, vcpu_fd: VcpuFd, entrypoint: u64, orig_rsp: GuestPtr, mem_regions: Vec, + interrupt_handle: Arc, #[cfg(gdb)] debug: Option, @@ -293,11 +297,12 @@ impl KVMDriver { /// set. Standard registers will not be set, and `initialise` must /// be called to do so. #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] - pub(super) fn new( + pub(crate) fn new( mem_regions: Vec, pml4_addr: u64, entrypoint: u64, rsp: u64, + config: &SandboxConfiguration, #[cfg(gdb)] gdb_conn: Option>, ) -> Result { let kvm = Kvm::new()?; @@ -345,13 +350,20 @@ impl KVMDriver { entrypoint, orig_rsp: rsp_gp, mem_regions, + interrupt_handle: Arc::new(LinuxInterruptHandle { + running: AtomicU64::new(0), + cancel_requested: AtomicBool::new(false), + tid: AtomicU64::new(unsafe { libc::pthread_self() }), + retry_delay: config.get_interrupt_retry_delay(), + dropped: AtomicBool::new(false), + sig_rt_min_offset: config.get_interrupt_vcpu_sigrtmin_offset(), + }), #[cfg(gdb)] debug, #[cfg(gdb)] gdb_conn, }; - Ok(ret) } @@ -406,7 +418,6 @@ impl Hypervisor for KVMDriver { page_size: u32, outb_hdl: OutBHandlerWrapper, mem_access_hdl: MemAccessHandlerWrapper, - hv_handler: Option, max_guest_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()> { @@ -431,7 +442,6 @@ impl Hypervisor for KVMDriver { VirtualCPU::run( self.as_mut_hypervisor(), - hv_handler, outb_hdl, mem_access_hdl, #[cfg(gdb)] @@ -447,7 +457,6 @@ impl Hypervisor for KVMDriver { dispatch_func_addr: RawPtr, outb_handle_fn: OutBHandlerWrapper, mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()> { // Reset general purpose registers, then set RIP and RSP @@ -470,7 +479,6 @@ impl Hypervisor for KVMDriver { // run VirtualCPU::run( self.as_mut_hypervisor(), - hv_handler, outb_handle_fn, mem_access_fn, #[cfg(gdb)] @@ -513,7 +521,55 @@ impl Hypervisor for KVMDriver { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] fn run(&mut self) -> Result { - let exit_reason = self.vcpu_fd.run(); + self.interrupt_handle + .tid + .store(unsafe { libc::pthread_self() as u64 }, Ordering::Relaxed); + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + self.interrupt_handle + .set_running_and_increment_generation() + .map_err(|e| { + new_error!( + "Error setting running state and incrementing generation: {}", + e + ) + })?; + // Don't run the vcpu if `cancel_requested` is true + // + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then this is fine since `cancel_requested` is set to true, so we will skip the `VcpuFd::run()` call + let exit_reason = if self + .interrupt_handle + .cancel_requested + .load(Ordering::Relaxed) + { + Err(kvm_ioctls::Error::new(libc::EINTR)) + } else { + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then the vcpu will run, but we will keep sending signals to this thread + // to interrupt it until `running` is set to false. The `vcpu_fd::run()` call will + // return either normally with an exit reason, or from being "kicked" by out signal handler, with an EINTR error, + // both of which are fine. + self.vcpu_fd.run() + }; + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then signals will be sent to this thread until `running` is set to false. + // This is fine since the signal handler is a no-op. + #[allow(unused_variables)] + // The variable is only used when `cfg(not(gdb))`, but the flag needs to be reset always anyway + let cancel_requested = self + .interrupt_handle + .cancel_requested + .swap(false, Ordering::Relaxed); + // Note: if a `InterruptHandle::kill()` called while this thread is **here** + // Then `cancel_requested` will be set to true again, which will cancel the **next vcpu run**. + // Additionally signals will be sent to this thread until `running` is set to false. + // This is fine since the signal handler is a no-op. + self.interrupt_handle.clear_running_bit(); + // At this point, `running` is false so no more signals will be sent to this thread, + // but we may still receive async signals that were sent before this point. + // To prevent those signals from interrupting subsequent calls to `run()` (on other vms!), + // we make sure to check `cancel_requested` before cancelling (see `libc::EINTR` match-arm below). let result = match exit_reason { Ok(VcpuExit::Hlt) => { crate::debug!("KVM - Halt Details : {:#?}", &self); @@ -565,7 +621,15 @@ impl Hypervisor for KVMDriver { libc::EINTR => HyperlightExit::Debug(VcpuStopReason::Interrupt), // we send a signal to the thread to cancel execution this results in EINTR being returned by KVM so we return Cancelled #[cfg(not(gdb))] - libc::EINTR => HyperlightExit::Cancelled(), + libc::EINTR => { + // If cancellation was not requested for this specific vm, the vcpu was interrupted because of stale signal + // that was meant to be delivered to a previous/other vcpu on this same thread, so let's ignore it + if cancel_requested { + HyperlightExit::Cancelled() + } else { + HyperlightExit::Retry() + } + } libc::EAGAIN => HyperlightExit::Retry(), _ => { crate::debug!("KVM Error -Details: Address: {} \n {:#?}", e, &self); @@ -586,6 +650,10 @@ impl Hypervisor for KVMDriver { self as &mut dyn Hypervisor } + fn interrupt_handle(&self) -> Arc { + self.interrupt_handle.clone() + } + #[cfg(crashdump)] fn get_memory_regions(&self) -> &[MemoryRegion] { &self.mem_regions @@ -634,58 +702,8 @@ impl Hypervisor for KVMDriver { } } -#[cfg(test)] -mod tests { - use std::sync::{Arc, Mutex}; - - #[cfg(gdb)] - use crate::hypervisor::handlers::DbgMemAccessHandlerCaller; - use crate::hypervisor::handlers::{MemAccessHandler, OutBHandler}; - use crate::hypervisor::tests::test_initialise; - use crate::Result; - - #[cfg(gdb)] - struct DbgMemAccessHandler {} - - #[cfg(gdb)] - impl DbgMemAccessHandlerCaller for DbgMemAccessHandler { - fn read(&mut self, _offset: usize, _data: &mut [u8]) -> Result<()> { - Ok(()) - } - - fn write(&mut self, _offset: usize, _data: &[u8]) -> Result<()> { - Ok(()) - } - - fn get_code_offset(&mut self) -> Result { - Ok(0) - } - } - - #[test] - fn test_init() { - if !super::is_hypervisor_present() { - return; - } - - let outb_handler: Arc> = { - let func: Box Result<()> + Send> = - Box::new(|_, _| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(OutBHandler::from(func))) - }; - let mem_access_handler = { - let func: Box Result<()> + Send> = Box::new(|| -> Result<()> { Ok(()) }); - Arc::new(Mutex::new(MemAccessHandler::from(func))) - }; - #[cfg(gdb)] - let dbg_mem_access_handler = Arc::new(Mutex::new(DbgMemAccessHandler {})); - - test_initialise( - outb_handler, - mem_access_handler, - #[cfg(gdb)] - dbg_mem_access_handler, - ) - .unwrap(); +impl Drop for KVMDriver { + fn drop(&mut self) { + self.interrupt_handle.dropped.store(true, Ordering::Relaxed); } } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 5f3aea052..105e59d66 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -33,11 +33,10 @@ pub mod hyperv_linux; #[cfg(target_os = "windows")] /// Hyperv-on-windows functionality pub(crate) mod hyperv_windows; -pub(crate) mod hypervisor_handler; /// GDB debugging support #[cfg(gdb)] -mod gdb; +pub(crate) mod gdb; #[cfg(kvm)] /// Functionality to manipulate KVM-based virtual machines @@ -60,7 +59,11 @@ pub(crate) mod crashdump; use std::fmt::Debug; use std::str::FromStr; +#[cfg(any(kvm, mshv))] +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; +#[cfg(any(kvm, mshv))] +use std::time::Duration; #[cfg(gdb)] use gdb::VcpuStopReason; @@ -70,7 +73,6 @@ use self::handlers::{DbgMemAccessHandlerCaller, DbgMemAccessHandlerWrapper}; use self::handlers::{ MemAccessHandlerCaller, MemAccessHandlerWrapper, OutBHandlerCaller, OutBHandlerWrapper, }; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; use crate::mem::ptr::RawPtr; pub(crate) const CR4_PAE: u64 = 1 << 5; @@ -111,10 +113,6 @@ pub enum HyperlightExit { } /// A common set of hypervisor functionality -/// -/// Note: a lot of these structures take in an `Option`. -/// This is because, if we are coming from the C API, we don't have a HypervisorHandler and have -/// to account for the fact the Hypervisor was set up beforehand. pub(crate) trait Hypervisor: Debug + Sync + Send { /// Initialise the internally stored vCPU with the given PEB address and /// random number seed, then run it until a HLT instruction. @@ -126,7 +124,6 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { page_size: u32, outb_handle_fn: OutBHandlerWrapper, mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, guest_max_log_level: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()>; @@ -143,7 +140,6 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { dispatch_func_addr: RawPtr, outb_handle_fn: OutBHandlerWrapper, mem_access_fn: MemAccessHandlerWrapper, - hv_handler: Option, #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> Result<()>; @@ -187,6 +183,9 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { None } + /// Get InterruptHandle to underlying VM + fn interrupt_handle(&self) -> Arc; + /// Get the logging level to pass to the guest entrypoint fn get_max_log_level(&self) -> u32 { // Check to see if the RUST_LOG environment variable is set @@ -227,10 +226,6 @@ pub(crate) trait Hypervisor: Debug + Sync + Send { /// get a mutable trait object from self fn as_mut_hypervisor(&mut self) -> &mut dyn Hypervisor; - /// Get the partition handle for WHP - #[cfg(target_os = "windows")] - fn get_partition_handle(&self) -> windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE; - #[cfg(crashdump)] fn get_memory_regions(&self) -> &[MemoryRegion]; @@ -253,7 +248,6 @@ impl VirtualCPU { #[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] pub fn run( hv: &mut dyn Hypervisor, - hv_handler: Option, outb_handle_fn: Arc>, mem_access_fn: Arc>, #[cfg(gdb)] dbg_mem_access_fn: Arc>, @@ -301,13 +295,6 @@ impl VirtualCPU { Ok(HyperlightExit::Cancelled()) => { // Shutdown is returned when the host has cancelled execution // After termination, the main thread will re-initialize the VM - if let Some(hvh) = hv_handler { - // If hvh is None, then we are running from the C API, which doesn't use - // the HypervisorHandler - hvh.set_running(false); - #[cfg(target_os = "linux")] - hvh.set_run_cancelled(true); - } metrics::counter!(METRIC_GUEST_CANCELLATION).increment(1); log_then_return!(ExecutionCanceledByHost()); } @@ -331,84 +318,199 @@ impl VirtualCPU { } } +/// A trait for handling interrupts to a sandbox's vcpu +pub trait InterruptHandle: Send + Sync { + /// Interrupt the corresponding sandbox from running. + /// + /// - If this is called while the vcpu is running, then it will interrupt the vcpu and return `true`. + /// - If this is called while the vcpu is not running, (for example during a host call), the + /// vcpu will not immediately be interrupted, but will prevent the vcpu from running **the next time** + /// it's scheduled, and returns `false`. + /// + /// # Note + /// This function will block for the duration of the time it takes for the vcpu thread to be interrupted. + fn kill(&self) -> bool; + + /// Returns true iff the corresponding sandbox has been dropped + fn dropped(&self) -> bool; +} + +#[cfg(any(kvm, mshv))] +#[derive(Debug)] +pub(super) struct LinuxInterruptHandle { + /// Invariant: vcpu is running => most significant bit (63) of `running` is set. (Neither converse nor inverse is true) + /// + /// Additionally, bit 0-62 tracks how many times the VCPU has been run. Incremented each time `run()` is called. + /// + /// This prevents an ABA problem where: + /// 1. The VCPU is running (generation N), + /// 2. It gets cancelled, + /// 3. Then quickly restarted (generation N+1), + /// before the original thread has observed that it was cancelled. + /// + /// Without this generation counter, the interrupt logic might assume the VCPU is still + /// in the *original* run (generation N), see that it's `running`, and re-send the signal. + /// But the new VCPU run (generation N+1) would treat this as a stale signal and ignore it, + /// potentially causing an infinite loop where no effective interrupt is delivered. + /// + /// Invariant: If the VCPU is running, `run_generation[bit 0-62]` matches the current run's generation. + running: AtomicU64, + /// Invariant: vcpu is running => `tid` is the thread on which it is running. + /// Note: multiple vms may have the same `tid`, but at most one vm will have `running` set to true. + tid: AtomicU64, + /// True when an "interruptor" has requested the VM to be cancelled. Set immediately when + /// `kill()` is called, and cleared when the vcpu is no longer running. + /// This is used to + /// 1. make sure stale signals do not interrupt the + /// the wrong vcpu (a vcpu may only be interrupted iff `cancel_requested` is true), + /// 2. ensure that if a vm is killed while a host call is running, + /// the vm will not re-enter the guest after the host call returns. + cancel_requested: AtomicBool, + /// Whether the corresponding vm is dropped + dropped: AtomicBool, + /// Retry delay between signals sent to the vcpu thread + retry_delay: Duration, + /// The offset of the SIGRTMIN signal used to interrupt the vcpu thread + sig_rt_min_offset: u8, +} + +#[cfg(any(kvm, mshv))] +impl LinuxInterruptHandle { + const RUNNING_BIT: u64 = 1 << 63; + const MAX_GENERATION: u64 = Self::RUNNING_BIT - 1; + + // set running to true and increment the generation. Generation will wrap around at `MAX_GENERATION`. + fn set_running_and_increment_generation(&self) -> std::result::Result { + self.running + .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |raw| { + let generation = raw & !Self::RUNNING_BIT; + if generation == Self::MAX_GENERATION { + // restart generation from 0 + return Some(Self::RUNNING_BIT); + } + Some((generation + 1) | Self::RUNNING_BIT) + }) + } + + // clear the running bit and return the generation + fn clear_running_bit(&self) -> u64 { + self.running + .fetch_and(!Self::RUNNING_BIT, Ordering::Relaxed) + } + + fn get_running_and_generation(&self) -> (bool, u64) { + let raw = self.running.load(Ordering::Relaxed); + let running = raw & Self::RUNNING_BIT != 0; + let generation = raw & !Self::RUNNING_BIT; + (running, generation) + } +} + +#[cfg(any(kvm, mshv))] +impl InterruptHandle for LinuxInterruptHandle { + fn kill(&self) -> bool { + self.cancel_requested.store(true, Ordering::Relaxed); + + let signal_number = libc::SIGRTMIN() + self.sig_rt_min_offset as libc::c_int; + let mut sent_signal = false; + let mut target_generation: Option = None; + + loop { + let (running, generation) = self.get_running_and_generation(); + + if !running { + break; + } + + match target_generation { + None => target_generation = Some(generation), + // prevent ABA problem + Some(expected) if expected != generation => break, + _ => {} + } + + log::info!("Sending signal to kill vcpu thread..."); + sent_signal = true; + unsafe { + libc::pthread_kill(self.tid.load(Ordering::Relaxed) as _, signal_number); + } + std::thread::sleep(self.retry_delay); + } + + sent_signal + } + fn dropped(&self) -> bool { + self.dropped.load(Ordering::Relaxed) + } +} + #[cfg(all(test, any(target_os = "windows", kvm)))] pub(crate) mod tests { - use std::path::Path; use std::sync::{Arc, Mutex}; - use std::time::Duration; use hyperlight_testing::dummy_guest_as_string; + use super::handlers::{MemAccessHandler, OutBHandler}; #[cfg(gdb)] - use super::handlers::DbgMemAccessHandlerWrapper; - use super::handlers::{MemAccessHandlerWrapper, OutBHandlerWrapper}; - use crate::hypervisor::hypervisor_handler::{ - HvHandlerConfig, HypervisorHandler, HypervisorHandlerAction, - }; + use crate::hypervisor::DbgMemAccessHandlerCaller; use crate::mem::ptr::RawPtr; use crate::sandbox::uninitialized::GuestBinary; + use crate::sandbox::uninitialized_evolve::set_up_hypervisor_partition; use crate::sandbox::{SandboxConfiguration, UninitializedSandbox}; - use crate::{new_error, Result}; + use crate::{is_hypervisor_present, new_error, Result}; - pub(crate) fn test_initialise( - outb_hdl: OutBHandlerWrapper, - mem_access_hdl: MemAccessHandlerWrapper, - #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, - ) -> Result<()> { - let filename = dummy_guest_as_string().map_err(|e| new_error!("{}", e))?; - if !Path::new(&filename).exists() { - return Err(new_error!( - "test_initialise: file {} does not exist", - filename - )); + #[cfg(gdb)] + struct DbgMemAccessHandler {} + + #[cfg(gdb)] + impl DbgMemAccessHandlerCaller for DbgMemAccessHandler { + fn read(&mut self, _offset: usize, _data: &mut [u8]) -> Result<()> { + Ok(()) } - let sandbox = UninitializedSandbox::new(GuestBinary::FilePath(filename.clone()), None)?; - let (hshm, gshm) = sandbox.mgr.build(); - drop(hshm); + fn write(&mut self, _offset: usize, _data: &[u8]) -> Result<()> { + Ok(()) + } - let hv_handler_config = HvHandlerConfig { - outb_handler: outb_hdl, - mem_access_handler: mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_handler: dbg_mem_access_fn, - seed: 1234567890, - page_size: 4096, - peb_addr: RawPtr::from(0x230000), - dispatch_function_addr: Arc::new(Mutex::new(None)), - max_init_time: Duration::from_millis( - SandboxConfiguration::DEFAULT_MAX_INITIALIZATION_TIME as u64, - ), - max_exec_time: Duration::from_millis( - SandboxConfiguration::DEFAULT_MAX_EXECUTION_TIME as u64, - ), - max_wait_for_cancellation: Duration::from_millis( - SandboxConfiguration::DEFAULT_MAX_WAIT_FOR_CANCELLATION as u64, - ), - max_guest_log_level: None, + fn get_code_offset(&mut self) -> Result { + Ok(0) + } + } + + #[test] + fn test_initialise() -> Result<()> { + if !is_hypervisor_present() { + return Ok(()); + } + + let outb_handler: Arc> = { + let func: Box Result<()> + Send> = + Box::new(|_, _| -> Result<()> { Ok(()) }); + Arc::new(Mutex::new(OutBHandler::from(func))) + }; + let mem_access_handler = { + let func: Box Result<()> + Send> = Box::new(|| -> Result<()> { Ok(()) }); + Arc::new(Mutex::new(MemAccessHandler::from(func))) }; + #[cfg(gdb)] + let dbg_mem_access_handler = Arc::new(Mutex::new(DbgMemAccessHandler {})); - let mut hv_handler = HypervisorHandler::new(hv_handler_config); - - // call initialise on the hypervisor implementation with specific values - // for PEB (process environment block) address, seed and page size. - // - // these values are not actually used, they're just checked inside - // the dummy guest, and if they don't match these values, the dummy - // guest issues a write to an invalid memory address, which in turn - // fails this test. - // - // in this test, we're not actually testing whether a guest can issue - // memory operations, call functions, etc... - we're just testing - // whether we can configure the shared memory region, load a binary - // into it, and run the CPU to completion (e.g., a HLT interrupt) - - hv_handler.start_hypervisor_handler( - gshm, - #[cfg(gdb)] - None, - )?; + let filename = dummy_guest_as_string().map_err(|e| new_error!("{}", e))?; - hv_handler.execute_hypervisor_handler_action(HypervisorHandlerAction::Initialise) + let config: SandboxConfiguration = Default::default(); + let sandbox = + UninitializedSandbox::new(GuestBinary::FilePath(filename.clone()), Some(config))?; + let (_hshm, mut gshm) = sandbox.mgr.build(); + let mut vm = set_up_hypervisor_partition(&mut gshm, &config)?; + vm.initialise( + RawPtr::from(0x230000), + 1234567890, + 4096, + outb_handler, + mem_access_handler, + None, + #[cfg(gdb)] + dbg_mem_access_handler, + ) } } diff --git a/src/hyperlight_host/src/mem/shared_mem.rs b/src/hyperlight_host/src/mem/shared_mem.rs index 06cf2b845..4fa5e5ff0 100644 --- a/src/hyperlight_host/src/mem/shared_mem.rs +++ b/src/hyperlight_host/src/mem/shared_mem.rs @@ -136,7 +136,7 @@ pub struct ExclusiveSharedMemory { } unsafe impl Send for ExclusiveSharedMemory {} -/// A GuestSharedMemory is used by the hypervisor handler to represent +/// A GuestSharedMemory is used to represent /// the reference to all-of-memory that is taken by the virtual cpu. /// Because of the memory model limitations that affect /// HostSharedMemory, it is likely fairly important (to ensure that diff --git a/src/hyperlight_host/src/metrics/mod.rs b/src/hyperlight_host/src/metrics/mod.rs index 94f15779f..60c0d2965 100644 --- a/src/hyperlight_host/src/metrics/mod.rs +++ b/src/hyperlight_host/src/metrics/mod.rs @@ -85,8 +85,11 @@ pub(crate) fn maybe_time_and_emit_host_call T>( #[cfg(test)] mod tests { + use std::thread; + use std::time::Duration; + use hyperlight_testing::simple_guest_as_string; - use metrics::Key; + use metrics::{with_local_recorder, Key}; use metrics_util::CompositeKey; use super::*; @@ -95,17 +98,10 @@ mod tests { use crate::{GuestBinary, UninitializedSandbox}; #[test] - #[ignore = "This test needs to be run separately to avoid having other tests interfere with it"] fn test_metrics_are_emitted() { - // Set up the recorder and snapshotter let recorder = metrics_util::debugging::DebuggingRecorder::new(); let snapshotter = recorder.snapshotter(); - - // we cannot use with_local_recorder, since that won't capture the metrics - // emitted by the hypervisor-thread (which is all of them) - recorder.install().unwrap(); - - let snapshot = { + let snapshot = with_local_recorder(&recorder, || { let uninit = UninitializedSandbox::new( GuestBinary::FilePath(simple_guest_as_string().unwrap()), None, @@ -113,6 +109,13 @@ mod tests { .unwrap(); let mut multi = uninit.evolve(Noop::default()).unwrap(); + let interrupt_handle = multi.interrupt_handle(); + + // interrupt the guest function call to "Spin" after 1 second + let thread = thread::spawn(move || { + thread::sleep(Duration::from_secs(1)); + assert!(interrupt_handle.kill()); + }); multi .call_guest_function_by_name::("PrintOutput", "Hello".to_string()) @@ -121,9 +124,10 @@ mod tests { multi .call_guest_function_by_name::("Spin", ()) .unwrap_err(); + thread.join().unwrap(); snapshotter.snapshot() - }; + }); // Convert snapshot into a hashmap for easier lookup #[expect(clippy::mutable_key_type)] @@ -132,27 +136,17 @@ mod tests { cfg_if::cfg_if! { if #[cfg(feature = "function_call_metrics")] { use metrics::Label; - // Verify that the histogram metrics are recorded correctly - assert_eq!(snapshot.len(), 4, "Expected two metrics in the snapshot"); - // 1. Host print duration - let histogram_key = CompositeKey::new( - metrics_util::MetricKind::Histogram, - Key::from_parts( - METRIC_HOST_FUNC_DURATION, - vec![Label::new("function_name", "HostPrint")], - ), - ); - let histogram_value = &snapshot.get(&histogram_key).unwrap().2; - assert!( - matches!( - histogram_value, - metrics_util::debugging::DebugValue::Histogram(histogram) if histogram.len() == 1 - ), - "Histogram metric does not match expected value" - ); + let expected_num_metrics = if cfg!(all(feature = "seccomp", target_os = "linux")) { + 3 // if seccomp enabled, the host call duration metric is emitted on a separate thread which this local recorder doesn't capture + } else { + 4 + }; - // 2. Guest call duration + // Verify that the histogram metrics are recorded correctly + assert_eq!(snapshot.len(), expected_num_metrics); + + // 1. Guest call duration let histogram_key = CompositeKey::new( metrics_util::MetricKind::Histogram, Key::from_parts( @@ -169,7 +163,7 @@ mod tests { "Histogram metric does not match expected value" ); - // 3. Guest cancellation + // 2. Guest cancellation let counter_key = CompositeKey::new( metrics_util::MetricKind::Counter, Key::from_name(METRIC_GUEST_CANCELLATION), @@ -179,7 +173,7 @@ mod tests { metrics_util::debugging::DebugValue::Counter(1) ); - // 4. Guest call duration + // 3. Guest call duration let histogram_key = CompositeKey::new( metrics_util::MetricKind::Histogram, Key::from_parts( @@ -195,9 +189,28 @@ mod tests { ), "Histogram metric does not match expected value" ); + + if !cfg!(all(feature = "seccomp", target_os = "linux")) { + // 4. Host call duration + let histogram_key = CompositeKey::new( + metrics_util::MetricKind::Histogram, + Key::from_parts( + METRIC_HOST_FUNC_DURATION, + vec![Label::new("function_name", "HostPrint")], + ), + ); + let histogram_value = &snapshot.get(&histogram_key).unwrap().2; + assert!( + matches!( + histogram_value, + metrics_util::debugging::DebugValue::Histogram(histogram) if histogram.len() == 1 + ), + "Histogram metric does not match expected value" + ); + } } else { // Verify that the counter metrics are recorded correctly - assert_eq!(snapshot.len(), 1, "Expected two metrics in the snapshot"); + assert_eq!(snapshot.len(), 1); let counter_key = CompositeKey::new( metrics_util::MetricKind::Counter, diff --git a/src/hyperlight_host/src/sandbox/config.rs b/src/hyperlight_host/src/sandbox/config.rs index 9e5f5cacb..5c2746224 100644 --- a/src/hyperlight_host/src/sandbox/config.rs +++ b/src/hyperlight_host/src/sandbox/config.rs @@ -14,9 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ -use std::cmp::{max, min}; +use std::cmp::max; use std::time::Duration; +#[cfg(target_os = "linux")] +use libc::c_int; use tracing::{instrument, Span}; use crate::mem::exe::ExeInfo; @@ -56,31 +58,21 @@ pub struct SandboxConfiguration { /// field should be represented as an `Option`, that type is not /// FFI-safe, so it cannot be. heap_size_override: u64, - /// The max_execution_time of a guest execution in milliseconds. If set to 0, the max_execution_time - /// will be set to the default value of 1000ms if the guest execution does not complete within the time specified - /// then the execution will be cancelled, the minimum value is 1ms + /// Delay between interrupt retries. This duration specifies how long to wait + /// between attempts to send signals to the thread running the sandbox's VCPU. + /// Multiple retries may be necessary because signals only interrupt the VCPU + /// thread when the vcpu thread is in kernel space. There's a narrow window during which a + /// signal can be delivered to the thread, but the thread may not yet + /// have entered kernel space. + interrupt_retry_delay: Duration, + /// Offset from `SIGRTMIN` used to determine the signal number for interrupting + /// the VCPU thread. The actual signal sent is `SIGRTMIN + interrupt_vcpu_sigrtmin_offset`. /// - /// Note: this is a C-compatible struct, so even though this optional - /// field should be represented as an `Option`, that type is not - /// FFI-safe, so it cannot be. - /// - max_execution_time: u16, - /// The max_wait_for_cancellation represents the maximum time the host should wait for a guest execution to be cancelled - /// If set to 0, the max_wait_for_cancellation will be set to the default value of 10ms. - /// The minimum value is 1ms. + /// This signal must fall within the valid real-time signal range supported by the host. /// - /// Note: this is a C-compatible struct, so even though this optional - /// field should be represented as an `Option`, that type is not - /// FFI-safe, so it cannot be. - max_wait_for_cancellation: u8, - // The max_initialization_time represents the maximum time the host should wait for a guest to initialize - // If set to 0, the max_initialization_time will be set to the default value of 2000ms. - // The minimum value is 1ms. - // - // Note: this is a C-compatible struct, so even though this optional - // field should be represented as an `Option`, that type is not - // FFI-safe, so it cannot be. - max_initialization_time: u16, + /// Note: Since real-time signals can vary across platforms, ensure that the offset + /// results in a signal number that is not already in use by other components of the system. + interrupt_vcpu_sigrtmin_offset: u8, } impl SandboxConfiguration { @@ -92,24 +84,10 @@ impl SandboxConfiguration { pub const DEFAULT_OUTPUT_SIZE: usize = 0x4000; /// The minimum size of output data pub const MIN_OUTPUT_SIZE: usize = 0x2000; - /// The default value for max initialization time (in milliseconds) - pub const DEFAULT_MAX_INITIALIZATION_TIME: u16 = 2000; - /// The minimum value for max initialization time (in milliseconds) - pub const MIN_MAX_INITIALIZATION_TIME: u16 = 1; - /// The maximum value for max initialization time (in milliseconds) - pub const MAX_MAX_INITIALIZATION_TIME: u16 = u16::MAX; - /// The default and minimum values for max execution time (in milliseconds) - pub const DEFAULT_MAX_EXECUTION_TIME: u16 = 1000; - /// The minimum value for max execution time (in milliseconds) - pub const MIN_MAX_EXECUTION_TIME: u16 = 1; - /// The maximum value for max execution time (in milliseconds) - pub const MAX_MAX_EXECUTION_TIME: u16 = u16::MAX; - /// The default and minimum values for max wait for cancellation (in milliseconds) - pub const DEFAULT_MAX_WAIT_FOR_CANCELLATION: u8 = 100; - /// The minimum value for max wait for cancellation (in milliseconds) - pub const MIN_MAX_WAIT_FOR_CANCELLATION: u8 = 10; - /// The maximum value for max wait for cancellation (in milliseconds) - pub const MAX_MAX_WAIT_FOR_CANCELLATION: u8 = u8::MAX; + /// The default interrupt retry delay + pub const DEFAULT_INTERRUPT_RETRY_DELAY: Duration = Duration::from_micros(500); + /// The default signal offset from `SIGRTMIN` used to determine the signal number for interrupting + pub const INTERRUPT_VCPU_SIGRTMIN_OFFSET: u8 = 0; #[allow(clippy::too_many_arguments)] /// Create a new configuration for a sandbox with the given sizes. @@ -119,9 +97,8 @@ impl SandboxConfiguration { output_data_size: usize, stack_size_override: Option, heap_size_override: Option, - max_execution_time: Option, - max_initialization_time: Option, - max_wait_for_cancellation: Option, + interrupt_retry_delay: Duration, + interrupt_vcpu_sigrtmin_offset: u8, #[cfg(gdb)] guest_debug_info: Option, ) -> Self { Self { @@ -129,53 +106,8 @@ impl SandboxConfiguration { output_data_size: max(output_data_size, Self::MIN_OUTPUT_SIZE), stack_size_override: stack_size_override.unwrap_or(0), heap_size_override: heap_size_override.unwrap_or(0), - max_execution_time: { - match max_execution_time { - Some(max_execution_time) => match max_execution_time.as_millis() { - 0 => Self::DEFAULT_MAX_EXECUTION_TIME, - 1.. => min( - Self::MAX_MAX_EXECUTION_TIME.into(), - max( - max_execution_time.as_millis(), - Self::MIN_MAX_EXECUTION_TIME.into(), - ), - ) as u16, - }, - None => Self::DEFAULT_MAX_EXECUTION_TIME, - } - }, - max_wait_for_cancellation: { - match max_wait_for_cancellation { - Some(max_wait_for_cancellation) => { - match max_wait_for_cancellation.as_millis() { - 0 => Self::DEFAULT_MAX_WAIT_FOR_CANCELLATION, - 1.. => min( - Self::MAX_MAX_WAIT_FOR_CANCELLATION.into(), - max( - max_wait_for_cancellation.as_millis(), - Self::MIN_MAX_WAIT_FOR_CANCELLATION.into(), - ), - ) as u8, - } - } - None => Self::DEFAULT_MAX_WAIT_FOR_CANCELLATION, - } - }, - max_initialization_time: { - match max_initialization_time { - Some(max_initialization_time) => match max_initialization_time.as_millis() { - 0 => Self::DEFAULT_MAX_INITIALIZATION_TIME, - 1.. => min( - Self::MAX_MAX_INITIALIZATION_TIME.into(), - max( - max_initialization_time.as_millis(), - Self::MIN_MAX_INITIALIZATION_TIME.into(), - ), - ) as u16, - }, - None => Self::DEFAULT_MAX_INITIALIZATION_TIME, - } - }, + interrupt_retry_delay, + interrupt_vcpu_sigrtmin_offset, #[cfg(gdb)] guest_debug_info, } @@ -207,60 +139,39 @@ impl SandboxConfiguration { self.heap_size_override = heap_size; } - /// Set the maximum execution time of a guest function execution. If set to 0, the max_execution_time - /// will be set to the default value of DEFAULT_MAX_EXECUTION_TIME if the guest execution does not complete within the time specified - /// then the execution will be cancelled, the minimum value is MIN_MAX_EXECUTION_TIME - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub fn set_max_execution_time(&mut self, max_execution_time: Duration) { - match max_execution_time.as_millis() { - 0 => self.max_execution_time = Self::DEFAULT_MAX_EXECUTION_TIME, - 1.. => { - self.max_execution_time = min( - Self::MAX_MAX_EXECUTION_TIME.into(), - max( - max_execution_time.as_millis(), - Self::MIN_MAX_EXECUTION_TIME.into(), - ), - ) as u16 - } - } + /// Sets the interrupt retry delay + pub fn set_interrupt_retry_delay(&mut self, delay: Duration) { + self.interrupt_retry_delay = delay; } - /// Set the maximum time to wait for guest execution calculation. If set to 0, the maximum cancellation time - /// will be set to the default value of DEFAULT_MAX_WAIT_FOR_CANCELLATION if the guest execution cancellation does not complete within the time specified - /// then an error will be returned, the minimum value is MIN_MAX_WAIT_FOR_CANCELLATION - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub fn set_max_execution_cancel_wait_time(&mut self, max_wait_for_cancellation: Duration) { - match max_wait_for_cancellation.as_millis() { - 0 => self.max_wait_for_cancellation = Self::DEFAULT_MAX_WAIT_FOR_CANCELLATION, - 1.. => { - self.max_wait_for_cancellation = min( - Self::MAX_MAX_WAIT_FOR_CANCELLATION.into(), - max( - max_wait_for_cancellation.as_millis(), - Self::MIN_MAX_WAIT_FOR_CANCELLATION.into(), - ), - ) as u8 - } - } + /// Get the delay between retries for interrupts + pub fn get_interrupt_retry_delay(&self) -> Duration { + self.interrupt_retry_delay } - /// Set the maximum time to wait for guest initialization. If set to 0, the maximum initialization time - /// will be set to the default value of DEFAULT_MAX_INITIALIZATION_TIME if the guest initialization does not complete within the time specified - /// then an error will be returned, the minimum value is MIN_MAX_INITIALIZATION_TIME - pub fn set_max_initialization_time(&mut self, max_initialization_time: Duration) { - match max_initialization_time.as_millis() { - 0 => self.max_initialization_time = Self::DEFAULT_MAX_INITIALIZATION_TIME, - 1.. => { - self.max_initialization_time = min( - Self::MAX_MAX_INITIALIZATION_TIME.into(), - max( - max_initialization_time.as_millis(), - Self::MIN_MAX_INITIALIZATION_TIME.into(), - ), - ) as u16 - } + /// Get the signal offset from `SIGRTMIN` used to determine the signal number for interrupting the VCPU thread + #[cfg(target_os = "linux")] + pub fn get_interrupt_vcpu_sigrtmin_offset(&self) -> u8 { + self.interrupt_vcpu_sigrtmin_offset + } + + /// Sets the offset from `SIGRTMIN` to determine the real-time signal used for + /// interrupting the VCPU thread. + /// + /// The final signal number is computed as `SIGRTMIN + offset`, and it must fall within + /// the valid range of real-time signals supported by the host system. + /// + /// Returns Ok(()) if the offset is valid, or an error if it exceeds the maximum real-time signal number. + #[cfg(target_os = "linux")] + pub fn set_interrupt_vcpu_sigrtmin_offset(&mut self, offset: u8) -> crate::Result<()> { + if libc::SIGRTMIN() + offset as c_int > libc::SIGRTMAX() { + return Err(crate::new_error!( + "Invalid SIGRTMIN offset: {}. It exceeds the maximum real-time signal number.", + offset + )); } + self.interrupt_vcpu_sigrtmin_offset = offset; + Ok(()) } /// Sets the configuration for the guest debug @@ -280,20 +191,6 @@ impl SandboxConfiguration { self.output_data_size } - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_max_execution_time(&self) -> u16 { - self.max_execution_time - } - - #[instrument(skip_all, parent = Span::current(), level= "Trace")] - pub(crate) fn get_max_wait_for_cancellation(&self) -> u8 { - self.max_wait_for_cancellation - } - - pub(crate) fn get_max_initialization_time(&self) -> u16 { - self.max_initialization_time - } - #[cfg(gdb)] #[instrument(skip_all, parent = Span::current(), level= "Trace")] pub(crate) fn get_guest_debug_info(&self) -> Option { @@ -335,9 +232,8 @@ impl Default for SandboxConfiguration { Self::DEFAULT_OUTPUT_SIZE, None, None, - None, - None, - None, + Self::DEFAULT_INTERRUPT_RETRY_DELAY, + Self::INTERRUPT_VCPU_SIGRTMIN_OFFSET, #[cfg(gdb)] None, ) @@ -346,8 +242,6 @@ impl Default for SandboxConfiguration { #[cfg(test)] mod tests { - use std::time::Duration; - use super::SandboxConfiguration; use crate::testing::simple_guest_exe_info; @@ -357,21 +251,13 @@ mod tests { const HEAP_SIZE_OVERRIDE: u64 = 0x50000; const INPUT_DATA_SIZE_OVERRIDE: usize = 0x4000; const OUTPUT_DATA_SIZE_OVERRIDE: usize = 0x4001; - const MAX_EXECUTION_TIME_OVERRIDE: u16 = 1010; - const MAX_WAIT_FOR_CANCELLATION_OVERRIDE: u8 = 200; - const MAX_INITIALIZATION_TIME_OVERRIDE: u16 = 2000; let mut cfg = SandboxConfiguration::new( INPUT_DATA_SIZE_OVERRIDE, OUTPUT_DATA_SIZE_OVERRIDE, Some(STACK_SIZE_OVERRIDE), Some(HEAP_SIZE_OVERRIDE), - Some(Duration::from_millis(MAX_EXECUTION_TIME_OVERRIDE as u64)), - Some(Duration::from_millis( - MAX_INITIALIZATION_TIME_OVERRIDE as u64, - )), - Some(Duration::from_millis( - MAX_WAIT_FOR_CANCELLATION_OVERRIDE as u64, - )), + SandboxConfiguration::DEFAULT_INTERRUPT_RETRY_DELAY, + SandboxConfiguration::INTERRUPT_VCPU_SIGRTMIN_OFFSET, #[cfg(gdb)] None, ); @@ -388,15 +274,6 @@ mod tests { assert_eq!(2048, cfg.heap_size_override); assert_eq!(INPUT_DATA_SIZE_OVERRIDE, cfg.input_data_size); assert_eq!(OUTPUT_DATA_SIZE_OVERRIDE, cfg.output_data_size); - assert_eq!(MAX_EXECUTION_TIME_OVERRIDE, cfg.max_execution_time); - assert_eq!( - MAX_WAIT_FOR_CANCELLATION_OVERRIDE, - cfg.max_wait_for_cancellation - ); - assert_eq!( - MAX_WAIT_FOR_CANCELLATION_OVERRIDE, - cfg.max_wait_for_cancellation - ); } #[test] @@ -406,15 +283,8 @@ mod tests { SandboxConfiguration::MIN_OUTPUT_SIZE - 1, None, None, - Some(Duration::from_millis( - SandboxConfiguration::MIN_MAX_EXECUTION_TIME as u64, - )), - Some(Duration::from_millis( - SandboxConfiguration::MIN_MAX_INITIALIZATION_TIME as u64, - )), - Some(Duration::from_millis( - SandboxConfiguration::MIN_MAX_WAIT_FOR_CANCELLATION as u64 - 1, - )), + SandboxConfiguration::DEFAULT_INTERRUPT_RETRY_DELAY, + SandboxConfiguration::INTERRUPT_VCPU_SIGRTMIN_OFFSET, #[cfg(gdb)] None, ); @@ -422,41 +292,12 @@ mod tests { assert_eq!(SandboxConfiguration::MIN_OUTPUT_SIZE, cfg.output_data_size); assert_eq!(0, cfg.stack_size_override); assert_eq!(0, cfg.heap_size_override); - assert_eq!( - SandboxConfiguration::MIN_MAX_EXECUTION_TIME, - cfg.max_execution_time - ); - assert_eq!( - SandboxConfiguration::MIN_MAX_WAIT_FOR_CANCELLATION, - cfg.max_wait_for_cancellation - ); - assert_eq!( - SandboxConfiguration::MIN_MAX_EXECUTION_TIME, - cfg.max_initialization_time - ); cfg.set_input_data_size(SandboxConfiguration::MIN_INPUT_SIZE - 1); cfg.set_output_data_size(SandboxConfiguration::MIN_OUTPUT_SIZE - 1); - cfg.set_max_execution_time(Duration::from_millis( - SandboxConfiguration::MIN_MAX_EXECUTION_TIME as u64, - )); - cfg.set_max_initialization_time(Duration::from_millis( - SandboxConfiguration::MIN_MAX_INITIALIZATION_TIME as u64 - 1, - )); - cfg.set_max_execution_cancel_wait_time(Duration::from_millis( - SandboxConfiguration::MIN_MAX_WAIT_FOR_CANCELLATION as u64 - 1, - )); assert_eq!(SandboxConfiguration::MIN_INPUT_SIZE, cfg.input_data_size); assert_eq!(SandboxConfiguration::MIN_OUTPUT_SIZE, cfg.output_data_size); - assert_eq!( - SandboxConfiguration::MIN_MAX_EXECUTION_TIME, - cfg.max_execution_time - ); - assert_eq!( - SandboxConfiguration::MIN_MAX_WAIT_FOR_CANCELLATION, - cfg.max_wait_for_cancellation - ); } mod proptests { @@ -481,27 +322,6 @@ mod tests { prop_assert_eq!(size, cfg.get_output_data_size()); } - #[test] - fn max_execution_time(time in SandboxConfiguration::MIN_MAX_EXECUTION_TIME..=SandboxConfiguration::MIN_MAX_EXECUTION_TIME * 10) { - let mut cfg = SandboxConfiguration::default(); - cfg.set_max_execution_time(std::time::Duration::from_millis(time.into())); - prop_assert_eq!(time, cfg.get_max_execution_time()); - } - - #[test] - fn max_wait_for_cancellation(time in SandboxConfiguration::MIN_MAX_WAIT_FOR_CANCELLATION..=SandboxConfiguration::MIN_MAX_WAIT_FOR_CANCELLATION * 10) { - let mut cfg = SandboxConfiguration::default(); - cfg.set_max_execution_cancel_wait_time(std::time::Duration::from_millis(time.into())); - prop_assert_eq!(time, cfg.get_max_wait_for_cancellation()); - } - - #[test] - fn max_initialization_time(time in SandboxConfiguration::MIN_MAX_INITIALIZATION_TIME..=SandboxConfiguration::MIN_MAX_INITIALIZATION_TIME * 10) { - let mut cfg = SandboxConfiguration::default(); - cfg.set_max_initialization_time(std::time::Duration::from_millis(time.into())); - prop_assert_eq!(time, cfg.get_max_initialization_time()); - } - #[test] fn stack_size_override(size in 0x1000..=0x10000u64) { let mut cfg = SandboxConfiguration::default(); diff --git a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs index 31e4c9568..ce3924baa 100644 --- a/src/hyperlight_host/src/sandbox/initialized_multi_use.rs +++ b/src/hyperlight_host/src/sandbox/initialized_multi_use.rs @@ -16,7 +16,7 @@ limitations under the License. use std::sync::{Arc, Mutex}; -#[cfg(feature = "fuzzing")] +use hyperlight_common::flatbuffer_wrappers::function_call::{FunctionCall, FunctionCallType}; use hyperlight_common::flatbuffer_wrappers::function_types::{ ParameterValue, ReturnType, ReturnValue, }; @@ -25,13 +25,18 @@ use tracing::{instrument, Span}; use super::host_funcs::FunctionRegistry; use super::{MemMgrWrapper, WrapperGetter}; use crate::func::call_ctx::MultiUseGuestCallContext; -use crate::func::guest_dispatch::call_function_on_guest; +use crate::func::guest_err::check_for_guest_error; use crate::func::{ParameterTuple, SupportedReturnType}; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; +#[cfg(gdb)] +use crate::hypervisor::handlers::DbgMemAccessHandlerWrapper; +use crate::hypervisor::handlers::{MemAccessHandlerCaller, OutBHandlerCaller}; +use crate::hypervisor::{Hypervisor, InterruptHandle}; +use crate::mem::ptr::RawPtr; use crate::mem::shared_mem::HostSharedMemory; +use crate::metrics::maybe_time_and_emit_guest_call; use crate::sandbox_state::sandbox::{DevolvableSandbox, EvolvableSandbox, Sandbox}; use crate::sandbox_state::transition::{MultiUseContextCallback, Noop}; -use crate::Result; +use crate::{HyperlightError, Result}; /// A sandbox that supports being used Multiple times. /// The implication of being used multiple times is two-fold: @@ -45,26 +50,12 @@ pub struct MultiUseSandbox { // We need to keep a reference to the host functions, even if the compiler marks it as unused. The compiler cannot detect our dynamic usages of the host function in `HyperlightFunction::call`. pub(super) _host_funcs: Arc>, pub(crate) mem_mgr: MemMgrWrapper, - hv_handler: HypervisorHandler, -} - -// We need to implement drop to join the -// threads, because, otherwise, we will -// be leaking a thread with every -// sandbox that is dropped. This was initially -// caught by our benchmarks that created a ton of -// sandboxes and caused the system to run out of -// resources. Now, this is covered by the test: -// `create_1000_sandboxes`. -impl Drop for MultiUseSandbox { - fn drop(&mut self) { - match self.hv_handler.kill_hypervisor_handler_thread() { - Ok(_) => {} - Err(e) => { - log::error!("[POTENTIAL THREAD LEAK] Potentially failed to kill hypervisor handler thread when dropping MultiUseSandbox: {:?}", e); - } - } - } + vm: Box, + out_hdl: Arc>, + mem_hdl: Arc>, + dispatch_ptr: RawPtr, + #[cfg(gdb)] + dbg_mem_access_fn: DbgMemAccessHandlerWrapper, } impl MultiUseSandbox { @@ -77,12 +68,21 @@ impl MultiUseSandbox { pub(super) fn from_uninit( host_funcs: Arc>, mgr: MemMgrWrapper, - hv_handler: HypervisorHandler, + vm: Box, + out_hdl: Arc>, + mem_hdl: Arc>, + dispatch_ptr: RawPtr, + #[cfg(gdb)] dbg_mem_access_fn: DbgMemAccessHandlerWrapper, ) -> MultiUseSandbox { Self { _host_funcs: host_funcs, mem_mgr: mgr, - hv_handler, + vm, + out_hdl, + mem_hdl, + dispatch_ptr, + #[cfg(gdb)] + dbg_mem_access_fn, } } @@ -162,9 +162,15 @@ impl MultiUseSandbox { func_name: &str, args: impl ParameterTuple, ) -> Result { - let ret = call_function_on_guest(self, func_name, Output::TYPE, args.into_value()); - self.restore_state()?; - Output::from_value(ret?) + maybe_time_and_emit_guest_call(func_name, || { + let ret = self.call_guest_function_by_name_no_reset( + func_name, + Output::TYPE, + args.into_value(), + ); + self.restore_state()?; + Output::from_value(ret?) + }) } /// This function is kept here for fuzz testing the parameter and return types @@ -176,9 +182,11 @@ impl MultiUseSandbox { ret_type: ReturnType, args: Vec, ) -> Result { - let ret = call_function_on_guest(self, func_name, ret_type, args); - self.restore_state()?; - ret + maybe_time_and_emit_guest_call(func_name, || { + let ret = self.call_guest_function_by_name_no_reset(func_name, ret_type, args); + self.restore_state()?; + ret + }) } /// Restore the Sandbox's state @@ -187,6 +195,49 @@ impl MultiUseSandbox { let mem_mgr = self.mem_mgr.unwrap_mgr_mut(); mem_mgr.restore_state_from_last_snapshot() } + + pub(crate) fn call_guest_function_by_name_no_reset( + &mut self, + function_name: &str, + return_type: ReturnType, + args: Vec, + ) -> Result { + let fc = FunctionCall::new( + function_name.to_string(), + Some(args), + FunctionCallType::Guest, + return_type, + ); + + let buffer: Vec = fc + .try_into() + .map_err(|_| HyperlightError::Error("Failed to serialize FunctionCall".to_string()))?; + + self.get_mgr_wrapper_mut() + .as_mut() + .write_guest_function_call(&buffer)?; + + self.vm.dispatch_call_from_host( + self.dispatch_ptr.clone(), + self.out_hdl.clone(), + self.mem_hdl.clone(), + #[cfg(gdb)] + self.dbg_mem_access_fn.clone(), + )?; + + self.check_stack_guard()?; + check_for_guest_error(self.get_mgr_wrapper_mut())?; + + self.get_mgr_wrapper_mut() + .as_mut() + .get_guest_function_call_result() + } + + /// Get a handle to the interrupt handler for this sandbox, + /// capable of interrupting guest execution. + pub fn interrupt_handle(&self) -> Arc { + self.vm.interrupt_handle() + } } impl WrapperGetter for MultiUseSandbox { @@ -196,12 +247,6 @@ impl WrapperGetter for MultiUseSandbox { fn get_mgr_wrapper_mut(&mut self) -> &mut MemMgrWrapper { &mut self.mem_mgr } - fn get_hv_handler(&self) -> &HypervisorHandler { - &self.hv_handler - } - fn get_hv_handler_mut(&mut self) -> &mut HypervisorHandler { - &mut self.hv_handler - } } impl Sandbox for MultiUseSandbox { @@ -270,13 +315,16 @@ where #[cfg(test)] mod tests { + use std::sync::{Arc, Barrier}; + use std::thread; + use hyperlight_testing::simple_guest_as_string; use crate::func::call_ctx::MultiUseGuestCallContext; use crate::sandbox::SandboxConfiguration; use crate::sandbox_state::sandbox::{DevolvableSandbox, EvolvableSandbox}; use crate::sandbox_state::transition::{MultiUseContextCallback, Noop}; - use crate::{GuestBinary, MultiUseSandbox, UninitializedSandbox}; + use crate::{GuestBinary, HyperlightError, MultiUseSandbox, Result, UninitializedSandbox}; // Tests to ensure that many (1000) function calls can be made in a call context with a small stack (1K) and heap(14K). // This test effectively ensures that the stack is being properly reset after each call and we are not leaking memory in the Guest. @@ -340,4 +388,142 @@ mod tests { let res: i32 = sbox3.call_guest_function_by_name("GetStatic", ()).unwrap(); assert_eq!(res, 0); } + + #[test] + // TODO: Investigate why this test fails with an incorrect error when run alongside other tests + #[ignore] + #[cfg(target_os = "linux")] + fn test_violate_seccomp_filters() -> Result<()> { + fn make_get_pid_syscall() -> Result { + let pid = unsafe { libc::syscall(libc::SYS_getpid) }; + Ok(pid as u64) + } + + // First, run to make sure it fails. + { + let mut usbox = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), + None, + ) + .unwrap(); + + usbox.register("MakeGetpidSyscall", make_get_pid_syscall)?; + + let mut sbox: MultiUseSandbox = usbox.evolve(Noop::default())?; + + let res: Result = sbox.call_guest_function_by_name("ViolateSeccompFilters", ()); + + #[cfg(feature = "seccomp")] + match res { + Ok(_) => panic!("Expected to fail due to seccomp violation"), + Err(e) => match e { + HyperlightError::DisallowedSyscall => {} + _ => panic!("Expected DisallowedSyscall error: {}", e), + }, + } + + #[cfg(not(feature = "seccomp"))] + match res { + Ok(_) => (), + Err(e) => panic!("Expected to succeed without seccomp: {}", e), + } + } + + // Second, run with allowing `SYS_getpid` + #[cfg(feature = "seccomp")] + { + let mut usbox = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), + None, + ) + .unwrap(); + + usbox.register_with_extra_allowed_syscalls( + "MakeGetpidSyscall", + make_get_pid_syscall, + vec![libc::SYS_getpid], + )?; + // ^^^ note, we are allowing SYS_getpid + + let mut sbox: MultiUseSandbox = usbox.evolve(Noop::default())?; + + let res: Result = sbox.call_guest_function_by_name("ViolateSeccompFilters", ()); + + match res { + Ok(_) => {} + Err(e) => panic!("Expected to succeed due to seccomp violation: {}", e), + } + } + + Ok(()) + } + + #[test] + fn test_trigger_exception_on_guest() { + let usbox = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().expect("Guest Binary Missing")), + None, + ) + .unwrap(); + + let mut multi_use_sandbox: MultiUseSandbox = usbox.evolve(Noop::default()).unwrap(); + + let res: Result<()> = multi_use_sandbox.call_guest_function_by_name("TriggerException", ()); + + assert!(res.is_err()); + + match res.unwrap_err() { + HyperlightError::GuestAborted(_, msg) => { + // msg should indicate we got an invalid opcode exception + assert!(msg.contains("InvalidOpcode")); + } + e => panic!( + "Expected HyperlightError::GuestExecutionError but got {:?}", + e + ), + } + } + + #[test] + #[ignore] // this test runs by itself because it uses a lot of system resources + fn create_1000_sandboxes() { + let barrier = Arc::new(Barrier::new(21)); + + let mut handles = vec![]; + + for _ in 0..20 { + let c = barrier.clone(); + + let handle = thread::spawn(move || { + c.wait(); + + for _ in 0..50 { + let usbox = UninitializedSandbox::new( + GuestBinary::FilePath( + simple_guest_as_string().expect("Guest Binary Missing"), + ), + None, + ) + .unwrap(); + + let mut multi_use_sandbox: MultiUseSandbox = + usbox.evolve(Noop::default()).unwrap(); + + let res: i32 = multi_use_sandbox + .call_guest_function_by_name("GetStatic", ()) + .unwrap(); + + assert_eq!(res, 0); + } + }); + + handles.push(handle); + } + + barrier.wait(); + + for handle in handles { + handle.join().unwrap(); + } + } } diff --git a/src/hyperlight_host/src/sandbox/mod.rs b/src/hyperlight_host/src/sandbox/mod.rs index 374faf722..b9d8d1dc1 100644 --- a/src/hyperlight_host/src/sandbox/mod.rs +++ b/src/hyperlight_host/src/sandbox/mod.rs @@ -48,7 +48,6 @@ pub use uninitialized::GuestBinary; pub use uninitialized::UninitializedSandbox; use self::mem_mgr::MemMgrWrapper; -use crate::hypervisor::hypervisor_handler::HypervisorHandler; #[cfg(target_os = "windows")] use crate::hypervisor::windows_hypervisor_platform; use crate::mem::shared_mem::HostSharedMemory; @@ -86,9 +85,6 @@ pub(crate) trait WrapperGetter { #[allow(dead_code)] fn get_mgr_wrapper(&self) -> &MemMgrWrapper; fn get_mgr_wrapper_mut(&mut self) -> &mut MemMgrWrapper; - fn get_hv_handler(&self) -> &HypervisorHandler; - #[allow(dead_code)] - fn get_hv_handler_mut(&mut self) -> &mut HypervisorHandler; } #[cfg(test)] diff --git a/src/hyperlight_host/src/sandbox/uninitialized.rs b/src/hyperlight_host/src/sandbox/uninitialized.rs index 44357bc41..8f52c81d3 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized.rs @@ -18,13 +18,10 @@ use std::fmt::Debug; use std::option::Option; use std::path::Path; use std::sync::{Arc, Mutex}; -use std::time::Duration; use log::LevelFilter; use tracing::{instrument, Span}; -#[cfg(gdb)] -use super::config::DebugInfo; use super::host_funcs::{default_writer_func, FunctionRegistry}; use super::mem_mgr::MemMgrWrapper; use super::uninitialized_evolve::evolve_impl_multi_use; @@ -67,12 +64,8 @@ pub struct UninitializedSandbox { pub(crate) host_funcs: Arc>, /// The memory manager for the sandbox. pub(crate) mgr: MemMgrWrapper, - pub(crate) max_initialization_time: Duration, - pub(crate) max_execution_time: Duration, - pub(crate) max_wait_for_cancellation: Duration, pub(crate) max_guest_log_level: Option, - #[cfg(gdb)] - pub(crate) debug_info: Option, + pub(crate) config: SandboxConfiguration, } impl crate::sandbox_state::sandbox::UninitializedSandbox for UninitializedSandbox { @@ -164,8 +157,6 @@ impl UninitializedSandbox { let sandbox_cfg = cfg.unwrap_or_default(); - #[cfg(gdb)] - let debug_info = sandbox_cfg.get_guest_debug_info(); let mut mem_mgr_wrapper = { let mut mgr = UninitializedSandbox::load_guest_binary(sandbox_cfg, &guest_binary)?; let stack_guard = Self::create_stack_guard(); @@ -180,16 +171,8 @@ impl UninitializedSandbox { let mut sandbox = Self { host_funcs, mgr: mem_mgr_wrapper, - max_initialization_time: Duration::from_millis( - sandbox_cfg.get_max_initialization_time() as u64, - ), - max_execution_time: Duration::from_millis(sandbox_cfg.get_max_execution_time() as u64), - max_wait_for_cancellation: Duration::from_millis( - sandbox_cfg.get_max_wait_for_cancellation() as u64, - ), max_guest_log_level: None, - #[cfg(gdb)] - debug_info, + config: sandbox_cfg, }; // If we were passed a writer for host print register it otherwise use the default. @@ -336,7 +319,6 @@ fn check_windows_version() -> Result<()> { mod tests { use std::sync::mpsc::channel; use std::sync::Arc; - use std::time::Duration; use std::{fs, thread}; use crossbeam_queue::ArrayQueue; @@ -372,8 +354,6 @@ mod tests { cfg.set_output_data_size(0x1000); cfg.set_stack_size(0x1000); cfg.set_heap_size(0x1000); - cfg.set_max_execution_time(Duration::from_millis(1001)); - cfg.set_max_execution_cancel_wait_time(Duration::from_millis(9)); Some(cfg) }; diff --git a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs index 5c371c782..593ac9165 100644 --- a/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs +++ b/src/hyperlight_host/src/sandbox/uninitialized_evolve.rs @@ -14,21 +14,22 @@ See the License for the specific language governing permissions and limitations under the License. */ -use core::time::Duration; use std::sync::{Arc, Mutex}; -use log::LevelFilter; use rand::Rng; use tracing::{instrument, Span}; +use super::hypervisor::{get_available_hypervisor, HypervisorType}; #[cfg(gdb)] use super::mem_access::dbg_mem_access_handler_wrapper; -use crate::hypervisor::hypervisor_handler::{ - HvHandlerConfig, HypervisorHandler, HypervisorHandlerAction, -}; +use super::SandboxConfiguration; +use crate::hypervisor::handlers::{MemAccessHandlerCaller, OutBHandlerCaller}; +use crate::hypervisor::Hypervisor; +use crate::mem::layout::SandboxMemoryLayout; use crate::mem::mgr::SandboxMemoryManager; -use crate::mem::ptr::RawPtr; -use crate::mem::shared_mem::GuestSharedMemory; +use crate::mem::ptr::{GuestPtr, RawPtr}; +use crate::mem::ptr_offset::Offset; +use crate::mem::shared_mem::{GuestSharedMemory, SharedMemory}; #[cfg(gdb)] use crate::sandbox::config::DebugInfo; use crate::sandbox::host_funcs::FunctionRegistry; @@ -36,7 +37,10 @@ use crate::sandbox::mem_access::mem_access_handler_wrapper; use crate::sandbox::outb::outb_handler_wrapper; use crate::sandbox::{HostSharedMemory, MemMgrWrapper}; use crate::sandbox_state::sandbox::Sandbox; -use crate::{new_error, MultiUseSandbox, Result, UninitializedSandbox}; +#[cfg(target_os = "linux")] +use crate::signal_handlers::setup_signal_handlers; +use crate::HyperlightError::NoHypervisorFound; +use crate::{log_then_return, new_error, MultiUseSandbox, Result, UninitializedSandbox}; /// The implementation for evolving `UninitializedSandbox`es to /// `Sandbox`es. @@ -58,64 +62,15 @@ where TransformFunc: Fn( Arc>, MemMgrWrapper, - HypervisorHandler, + Box, + Arc>, + Arc>, + RawPtr, ) -> Result, { - let (hshm, gshm) = u_sbox.mgr.build(); - - let hv_handler = { - let mut hv_handler = hv_init( - &hshm, - gshm, - u_sbox.host_funcs.clone(), - u_sbox.max_initialization_time, - u_sbox.max_execution_time, - u_sbox.max_wait_for_cancellation, - u_sbox.max_guest_log_level, - #[cfg(gdb)] - u_sbox.debug_info, - )?; - - { - let dispatch_function_addr = hshm.as_ref().get_pointer_to_dispatch_function()?; - if dispatch_function_addr == 0 { - return Err(new_error!("Dispatch function address is null")); - } - hv_handler.set_dispatch_function_addr(RawPtr::from(dispatch_function_addr))?; - } - - hv_handler - }; - - transform(u_sbox.host_funcs, hshm, hv_handler) -} - -#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] -pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result { - evolve_impl(u_sbox, |hf, mut hshm, hv_handler| { - { - hshm.as_mut().push_state()?; - } - Ok(MultiUseSandbox::from_uninit(hf, hshm, hv_handler)) - }) -} - -#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] -#[allow(clippy::too_many_arguments)] -fn hv_init( - hshm: &MemMgrWrapper, - gshm: SandboxMemoryManager, - host_funcs: Arc>, - max_init_time: Duration, - max_exec_time: Duration, - max_wait_for_cancellation: Duration, - max_guest_log_level: Option, - #[cfg(gdb)] debug_info: Option, -) -> Result { - let outb_hdl = outb_handler_wrapper(hshm.clone(), host_funcs); - let mem_access_hdl = mem_access_handler_wrapper(hshm.clone()); - #[cfg(gdb)] - let dbg_mem_access_hdl = dbg_mem_access_handler_wrapper(hshm.clone()); + let (hshm, mut gshm) = u_sbox.mgr.build(); + let mut vm = set_up_hypervisor_partition(&mut gshm, &u_sbox.config)?; + let outb_hdl = outb_handler_wrapper(hshm.clone(), u_sbox.host_funcs.clone()); let seed = { let mut rng = rand::rng(); @@ -125,40 +80,175 @@ fn hv_init( let peb_u64 = u64::try_from(gshm.layout.peb_address)?; RawPtr::from(peb_u64) }; + let page_size = u32::try_from(page_size::get())?; - let hv_handler_config = HvHandlerConfig { - outb_handler: outb_hdl, - mem_access_handler: mem_access_hdl, - #[cfg(gdb)] - dbg_mem_access_handler: dbg_mem_access_hdl, + let mem_access_hdl = mem_access_handler_wrapper(hshm.clone()); + + #[cfg(gdb)] + let dbg_mem_access_hdl = dbg_mem_access_handler_wrapper(hshm.clone()); + + #[cfg(target_os = "linux")] + setup_signal_handlers(&u_sbox.config)?; + + vm.initialise( + peb_addr, seed, page_size, - peb_addr, - dispatch_function_addr: Arc::new(Mutex::new(None)), - max_init_time, - max_exec_time, - max_wait_for_cancellation, - max_guest_log_level, + outb_hdl.clone(), + mem_access_hdl.clone(), + u_sbox.max_guest_log_level, + #[cfg(gdb)] + dbg_mem_access_hdl, + )?; + + let dispatch_function_addr = hshm.as_ref().get_pointer_to_dispatch_function()?; + if dispatch_function_addr == 0 { + return Err(new_error!("Dispatch function address is null")); + } + + transform( + u_sbox.host_funcs, + hshm, + vm, + outb_hdl, + mem_access_hdl, + RawPtr::from(dispatch_function_addr), + ) +} + +#[instrument(err(Debug), skip_all, parent = Span::current(), level = "Trace")] +pub(super) fn evolve_impl_multi_use(u_sbox: UninitializedSandbox) -> Result { + evolve_impl( + u_sbox, + |hf, mut hshm, vm, out_hdl, mem_hdl, dispatch_ptr| { + { + hshm.as_mut().push_state()?; + } + Ok(MultiUseSandbox::from_uninit( + hf, + hshm.clone(), + vm, + out_hdl, + mem_hdl, + dispatch_ptr, + #[cfg(gdb)] + dbg_mem_access_handler_wrapper(hshm), + )) + }, + ) +} + +pub(crate) fn set_up_hypervisor_partition( + mgr: &mut SandboxMemoryManager, + #[cfg_attr(target_os = "windows", allow(unused_variables))] config: &SandboxConfiguration, +) -> Result> { + let mem_size = u64::try_from(mgr.shared_mem.mem_size())?; + let mut regions = mgr.layout.get_memory_regions(&mgr.shared_mem)?; + let rsp_ptr = { + let rsp_u64 = mgr.set_up_shared_memory(mem_size, &mut regions)?; + let rsp_raw = RawPtr::from(rsp_u64); + GuestPtr::try_from(rsp_raw) + }?; + let base_ptr = GuestPtr::try_from(Offset::from(0))?; + let pml4_ptr = { + let pml4_offset_u64 = u64::try_from(SandboxMemoryLayout::PML4_OFFSET)?; + base_ptr + Offset::from(pml4_offset_u64) }; - // Note: `dispatch_function_addr` is set by the Hyperlight guest library, and so it isn't in - // shared memory at this point in time. We will set it after the execution of `hv_init`. + let entrypoint_ptr = { + let entrypoint_total_offset = mgr.load_addr.clone() + mgr.entrypoint_offset; + GuestPtr::try_from(entrypoint_total_offset) + }?; - let mut hv_handler = HypervisorHandler::new(hv_handler_config); + if base_ptr != pml4_ptr { + log_then_return!( + "Error: base_ptr ({:#?}) does not equal pml4_ptr ({:#?})", + base_ptr, + pml4_ptr + ); + } + if entrypoint_ptr <= pml4_ptr { + log_then_return!( + "Error: entrypoint_ptr ({:#?}) is not greater than pml4_ptr ({:#?})", + entrypoint_ptr, + pml4_ptr + ); + } - hv_handler.start_hypervisor_handler( - gshm, - #[cfg(gdb)] - debug_info, - )?; + // Create gdb thread if gdb is enabled and the configuration is provided + #[cfg(gdb)] + let gdb_conn = if let Some(DebugInfo { port }) = config.get_guest_debug_info() { + use crate::hypervisor::gdb::create_gdb_thread; + + let gdb_conn = create_gdb_thread(port, unsafe { libc::pthread_self() }); - hv_handler - .execute_hypervisor_handler_action(HypervisorHandlerAction::Initialise) - .map_err(|exec_e| match hv_handler.kill_hypervisor_handler_thread() { - Ok(_) => exec_e, - Err(kill_e) => new_error!("{}", format!("{}, {}", exec_e, kill_e)), - })?; + // in case the gdb thread creation fails, we still want to continue + // without gdb + match gdb_conn { + Ok(gdb_conn) => Some(gdb_conn), + Err(e) => { + log::error!("Could not create gdb connection: {:#}", e); - Ok(hv_handler) + None + } + } + } else { + None + }; + + match *get_available_hypervisor() { + #[cfg(mshv)] + Some(HypervisorType::Mshv) => { + let hv = crate::hypervisor::hyperv_linux::HypervLinuxDriver::new( + regions, + entrypoint_ptr, + rsp_ptr, + pml4_ptr, + config, + #[cfg(gdb)] + gdb_conn, + )?; + Ok(Box::new(hv)) + } + + #[cfg(kvm)] + Some(HypervisorType::Kvm) => { + let hv = crate::hypervisor::kvm::KVMDriver::new( + regions, + pml4_ptr.absolute()?, + entrypoint_ptr.absolute()?, + rsp_ptr.absolute()?, + config, + #[cfg(gdb)] + gdb_conn, + )?; + Ok(Box::new(hv)) + } + + #[cfg(target_os = "windows")] + Some(HypervisorType::Whp) => { + use std::ffi::c_void; + + use crate::hypervisor::wrappers::HandleWrapper; + + let mmap_file_handle = mgr + .shared_mem + .with_exclusivity(|e| e.get_mmap_file_handle())?; + let hv = crate::hypervisor::hyperv_windows::HypervWindowsDriver::new( + regions, + mgr.shared_mem.raw_mem_size(), // we use raw_* here because windows driver requires 64K aligned addresses, + mgr.shared_mem.raw_ptr() as *mut c_void, // and instead convert it to base_addr where needed in the driver itself + pml4_ptr.absolute()?, + entrypoint_ptr.absolute()?, + rsp_ptr.absolute()?, + HandleWrapper::from(mmap_file_handle), + )?; + Ok(Box::new(hv)) + } + + _ => { + log_then_return!(NoHypervisorFound()); + } + } } #[cfg(test)] diff --git a/src/hyperlight_host/src/seccomp/guest.rs b/src/hyperlight_host/src/seccomp/guest.rs index a5b693351..df512fe34 100644 --- a/src/hyperlight_host/src/seccomp/guest.rs +++ b/src/hyperlight_host/src/seccomp/guest.rs @@ -63,8 +63,7 @@ fn syscalls_allowlist() -> Result)>> { } /// Creates a `BpfProgram` for a `SeccompFilter` over specific syscalls/`SeccompRule`s -/// intended to be applied in the Hypervisor Handler thread - i.e., over untrusted guest code -/// execution. +/// intended to be applied on host function threads. /// /// Note: This does not provide coverage over the Hyperlight host, which is why we don't need /// `SeccompRules` for operations we definitely perform but are outside the handler thread diff --git a/src/hyperlight_host/src/signal_handlers/mod.rs b/src/hyperlight_host/src/signal_handlers/mod.rs index bc8f02cf2..bf35d499d 100644 --- a/src/hyperlight_host/src/signal_handlers/mod.rs +++ b/src/hyperlight_host/src/signal_handlers/mod.rs @@ -14,10 +14,14 @@ See the License for the specific language governing permissions and limitations under the License. */ +use libc::c_int; + +use crate::sandbox::SandboxConfiguration; + #[cfg(feature = "seccomp")] pub mod sigsys_signal_handler; -pub(crate) fn setup_signal_handlers() -> crate::Result<()> { +pub(crate) fn setup_signal_handlers(config: &SandboxConfiguration) -> crate::Result<()> { // This is unsafe because signal handlers only allow a very restrictive set of // functions (i.e., async-signal-safe functions) to be executed inside them. // Anything that performs memory allocations, locks, and others are non-async-signal-safe. @@ -45,7 +49,10 @@ pub(crate) fn setup_signal_handlers() -> crate::Result<()> { original_hook(panic_info); })); } - vmm_sys_util::signal::register_signal_handler(libc::SIGRTMIN(), handle_hltimeout)?; + vmm_sys_util::signal::register_signal_handler( + libc::SIGRTMIN() + config.get_interrupt_vcpu_sigrtmin_offset() as c_int, + vm_kill_signal, + )?; // Note: For libraries registering signal handlers, it's important to keep in mind that // the user of the library could have their own signal handlers that we don't want to @@ -60,6 +67,6 @@ pub(crate) fn setup_signal_handlers() -> crate::Result<()> { Ok(()) } -extern "C" fn handle_hltimeout(_: libc::c_int, _: *mut libc::siginfo_t, _: *mut libc::c_void) { - // Do nothing. SIGRTMIN is just used to issue a VM exit to the underlying VMM. +extern "C" fn vm_kill_signal(_: libc::c_int, _: *mut libc::siginfo_t, _: *mut libc::c_void) { + // Do nothing. SIGRTMIN is just used to issue a VM exit to the underlying VM. } diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 885b413fa..edd6a6b51 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -14,6 +14,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #![allow(clippy::disallowed_macros)] +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::{Arc, Barrier}; +use std::thread; +use std::time::Duration; + use hyperlight_common::flatbuffer_wrappers::guest_error::ErrorCode; use hyperlight_common::mem::PAGE_SIZE; use hyperlight_host::sandbox::SandboxConfiguration; @@ -21,12 +26,307 @@ use hyperlight_host::sandbox_state::sandbox::EvolvableSandbox; use hyperlight_host::sandbox_state::transition::Noop; use hyperlight_host::{GuestBinary, HyperlightError, MultiUseSandbox, UninitializedSandbox}; use hyperlight_testing::simplelogger::{SimpleLogger, LOGGER}; -use hyperlight_testing::{c_simple_guest_as_string, simple_guest_as_string}; +use hyperlight_testing::{ + c_simple_guest_as_string, callback_guest_as_string, simple_guest_as_string, +}; use log::LevelFilter; pub mod common; // pub to disable dead_code warning use crate::common::{new_uninit, new_uninit_rust}; +// A host function cannot be interrupted, but we can at least make sure after requesting to interrupt a host call, +// we don't re-enter the guest again once the host call is done +#[test] +fn interrupt_host_call() { + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + + let mut usbox = UninitializedSandbox::new( + GuestBinary::FilePath(callback_guest_as_string().expect("Guest Binary Missing")), + None, + ) + .unwrap(); + + let spin = move || { + barrier2.wait(); + thread::sleep(std::time::Duration::from_secs(1)); + Ok(()) + }; + + #[cfg(any(target_os = "windows", not(feature = "seccomp")))] + usbox.register("Spin", spin).unwrap(); + + #[cfg(all(target_os = "linux", feature = "seccomp"))] + usbox + .register_with_extra_allowed_syscalls("Spin", spin, vec![libc::SYS_clock_nanosleep]) + .unwrap(); + + let mut sandbox: MultiUseSandbox = usbox.evolve(Noop::default()).unwrap(); + let interrupt_handle = sandbox.interrupt_handle(); + assert!(!interrupt_handle.dropped()); // not yet dropped + + let thread = thread::spawn({ + move || { + barrier.wait(); // wait for the host function to be entered + interrupt_handle.kill(); // send kill once host call is in progress + } + }); + + let result = sandbox + .call_guest_function_by_name::("CallHostSpin", ()) + .unwrap_err(); + assert!(matches!(result, HyperlightError::ExecutionCanceledByHost())); + + thread.join().unwrap(); +} + +/// Makes sure a running guest call can be interrupted by the host +#[test] +fn interrupt_in_progress_guest_call() { + let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + let interrupt_handle = sbox1.interrupt_handle(); + assert!(!interrupt_handle.dropped()); // not yet dropped + + // kill vm after 1 second + let thread = thread::spawn(move || { + thread::sleep(Duration::from_secs(1)); + assert!(interrupt_handle.kill()); + barrier2.wait(); // wait here until main thread has returned from the interrupted guest call + barrier2.wait(); // wait here until main thread has dropped the sandbox + assert!(interrupt_handle.dropped()); + }); + + let res = sbox1 + .call_guest_function_by_name::("Spin", ()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + + barrier.wait(); + // Make sure we can still call guest functions after the VM was interrupted + sbox1 + .call_guest_function_by_name::("Echo", "hello".to_string()) + .unwrap(); + + // drop vm to make sure other thread can detect it + drop(sbox1); + barrier.wait(); + thread.join().expect("Thread should finish"); +} + +/// Makes sure interrupting a vm before the guest call has started also prevents the guest call from being executed +#[test] +fn interrupt_guest_call_in_advance() { + let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + let interrupt_handle = sbox1.interrupt_handle(); + assert!(!interrupt_handle.dropped()); // not yet dropped + + // kill vm before the guest call has started + let thread = thread::spawn(move || { + assert!(!interrupt_handle.kill()); // should return false since vcpu is not running yet + barrier2.wait(); + barrier2.wait(); // wait here until main thread has dropped the sandbox + assert!(interrupt_handle.dropped()); + }); + + barrier.wait(); // wait until `kill()` is called before starting the guest call + let res = sbox1 + .call_guest_function_by_name::("Spin", ()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + + // Make sure we can still call guest functions after the VM was interrupted + sbox1 + .call_guest_function_by_name::("Echo", "hello".to_string()) + .unwrap(); + + // drop vm to make sure other thread can detect it + drop(sbox1); + barrier.wait(); + thread.join().expect("Thread should finish"); +} + +/// Verifies that only the intended sandbox (`sbox2`) is interruptible, +/// even when multiple sandboxes share the same thread. +/// This test runs several interleaved iterations where `sbox2` is interrupted, +/// and ensures that: +/// - `sbox1` and `sbox3` are never affected by the interrupt. +/// - `sbox2` either completes normally or fails with `ExecutionCanceledByHost`. +/// +/// This test is not foolproof and may not catch +/// all possible interleavings, but can hopefully increases confidence somewhat. +#[test] +fn interrupt_same_thread() { + let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + + let interrupt_handle = sbox2.interrupt_handle(); + assert!(!interrupt_handle.dropped()); // not yet dropped + + const NUM_ITERS: usize = 500; + + // kill vm after 1 second + let thread = thread::spawn(move || { + for _ in 0..NUM_ITERS { + barrier2.wait(); + interrupt_handle.kill(); + } + }); + + for _ in 0..NUM_ITERS { + barrier.wait(); + sbox1 + .call_guest_function_by_name::("Echo", "hello".to_string()) + .expect("Only sandbox 2 is allowed to be interrupted"); + match sbox2.call_guest_function_by_name::("Echo", "hello".to_string()) { + Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => { + // Only allow successful calls or interrupted. + // The call can be successful in case the call is finished before kill() is called. + } + _ => panic!("Unexpected return"), + }; + sbox3 + .call_guest_function_by_name::("Echo", "hello".to_string()) + .expect("Only sandbox 2 is allowed to be interrupted"); + } + thread.join().expect("Thread should finish"); +} + +/// Same test as above but with no per-iteration barrier, to get more possible interleavings. +#[test] +fn interrupt_same_thread_no_barrier() { + let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + let mut sbox3: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + let workload_done = Arc::new(AtomicBool::new(false)); + let workload_done2 = workload_done.clone(); + + let interrupt_handle = sbox2.interrupt_handle(); + assert!(!interrupt_handle.dropped()); // not yet dropped + + const NUM_ITERS: usize = 500; + + // kill vm after 1 second + let thread = thread::spawn(move || { + barrier2.wait(); + while !workload_done2.load(Ordering::Relaxed) { + interrupt_handle.kill(); + } + }); + + barrier.wait(); + for _ in 0..NUM_ITERS { + sbox1 + .call_guest_function_by_name::("Echo", "hello".to_string()) + .expect("Only sandbox 2 is allowed to be interrupted"); + match sbox2.call_guest_function_by_name::("Echo", "hello".to_string()) { + Ok(_) | Err(HyperlightError::ExecutionCanceledByHost()) => { + // Only allow successful calls or interrupted. + // The call can be successful in case the call is finished before kill() is called. + } + _ => panic!("Unexpected return"), + }; + sbox3 + .call_guest_function_by_name::("Echo", "hello".to_string()) + .expect("Only sandbox 2 is allowed to be interrupted"); + } + workload_done.store(true, Ordering::Relaxed); + thread.join().expect("Thread should finish"); +} + +// Verify that a sandbox moved to a different thread after initialization can still be killed, +// and that anther sandbox on the original thread does not get incorrectly killed +#[test] +fn interrupt_moved_sandbox() { + let mut sbox1: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + let mut sbox2: MultiUseSandbox = new_uninit_rust().unwrap().evolve(Noop::default()).unwrap(); + + let interrupt_handle = sbox1.interrupt_handle(); + let interrupt_handle2 = sbox2.interrupt_handle(); + + let barrier = Arc::new(Barrier::new(2)); + let barrier2 = barrier.clone(); + + let thread = thread::spawn(move || { + barrier2.wait(); + let res = sbox1 + .call_guest_function_by_name::("Spin", ()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + }); + + let thread2 = thread::spawn(move || { + barrier.wait(); + thread::sleep(Duration::from_secs(1)); + assert!(interrupt_handle.kill()); + + // make sure this returns true, which means the sandbox wasn't killed incorrectly before + assert!(interrupt_handle2.kill()); + }); + + let res = sbox2 + .call_guest_function_by_name::("Spin", ()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + + thread.join().expect("Thread should finish"); + thread2.join().expect("Thread should finish"); +} + +/// This tests exercises the behavior of killing vcpu with a long retry delay. +/// This will exercise the ABA-problem, where the vcpu could be successfully interrupted, +/// but restarted, before the interruptor-thread has a chance to see that the vcpu was killed. +/// +/// The ABA-problem is solved by introducing run-generation on the vcpu. +#[test] +#[cfg(target_os = "linux")] +fn interrupt_custom_signal_no_and_retry_delay() { + let mut config = SandboxConfiguration::default(); + config.set_interrupt_vcpu_sigrtmin_offset(0).unwrap(); + config.set_interrupt_retry_delay(Duration::from_secs(1)); + + let mut sbox1: MultiUseSandbox = UninitializedSandbox::new( + GuestBinary::FilePath(simple_guest_as_string().unwrap()), + Some(config), + ) + .unwrap() + .evolve(Noop::default()) + .unwrap(); + + let interrupt_handle = sbox1.interrupt_handle(); + assert!(!interrupt_handle.dropped()); // not yet dropped + + const NUM_ITERS: usize = 3; + + let thread = thread::spawn(move || { + for _ in 0..NUM_ITERS { + // wait for the guest call to start + thread::sleep(Duration::from_millis(1000)); + interrupt_handle.kill(); + } + }); + + for _ in 0..NUM_ITERS { + let res = sbox1 + .call_guest_function_by_name::("Spin", ()) + .unwrap_err(); + assert!(matches!(res, HyperlightError::ExecutionCanceledByHost())); + // immediately reenter another guest function call after having being cancelled, + // so that the vcpu is running again before the interruptor-thread has a chance to see that the vcpu is not running + } + thread.join().expect("Thread should finish"); +} + #[test] fn print_four_args_c_guest() { let path = c_simple_guest_as_string().unwrap();